
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 2,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [02:50<5:30:27, 49.93it/s]global step 10000, trans_decision ep_re 445.99574951021185

{"global_step": 10000, "eval_re": [125.26404514234831, 980.8545282487656, 
249.86231360866003, 447.91660294116116, 250.72722451604596, 325.17551368983874, 
943.897060856304, 201.0132240418614, 22.796535833955545, 912.450446223178], 
"eval_len": [210, 1000, 254, 269, 298, 375, 1000, 240, 124, 1000]}

  2%|▏         | 19995/1000000 [08:00<6:01:47, 45.15it/s]global step 20000, trans_decision ep_re 290.41041767824515

{"global_step": 20000, "eval_re": [-0.4777744729158716, 235.80660827993032, 
323.48066876851675, 295.63802481356566, 59.126119801401394, 368.0107929955756, 
761.3848438360998, 317.4540158345131, 188.61787218957738, 355.0630047361868], 
"eval_len": [11, 214, 163, 172, 198, 200, 815, 214, 175, 213]}

  3%|▎         | 29995/1000000 [12:57<5:20:03, 50.51it/s]global step 30000, trans_decision ep_re 420.20262153700753

{"global_step": 30000, "eval_re": [509.10377722834636, 467.1238999839013, 
337.5798178650273, 433.7078675736477, 378.0554134779337, 430.23767830467585, 
349.13679428763254, 477.2582819952141, 413.5176388998887, 406.30504575380746], 
"eval_len": [287, 261, 161, 235, 183, 238, 181, 254, 220, 201]}

  4%|▍         | 39999/1000000 [18:01<5:39:09, 47.18it/s]global step 40000, trans_decision ep_re 371.5344417917868

{"global_step": 40000, "eval_re": [382.98674088698647, 101.85366955299283, 
413.94089455445834, 335.210448657824, 560.0366608284766, 422.823014559887, 
261.5095059286175, 338.7605986398568, 433.09124038301877, 465.1316439257492], 
"eval_len": [172, 134, 205, 197, 259, 212, 119, 178, 228, 230]}

  5%|▍         | 49995/1000000 [23:20<5:48:38, 45.41it/s]global step 50000, trans_decision ep_re 264.84403463877766

{"global_step": 50000, "eval_re": [357.72226511161915, 468.61351642326116, 
513.463623284472, 50.241388923023955, 92.11128383294715, 454.0609755430122, 
301.7370740769607, 114.9010366305648, 18.559485962704105, 277.02969659921166], 
"eval_len": [211, 358, 275, 187, 117, 358, 157, 195, 39, 177]}

  6%|▌         | 59996/1000000 [28:06<7:31:27, 34.70it/s]global step 60000, trans_decision ep_re 440.4480122045372

{"global_step": 60000, "eval_re": [372.4733341698158, 710.2678937966785, 
18.785237107014993, 318.0006917228871, 333.80151966445413, 485.16754201751513, 
379.3880750105044, 484.6417852331373, 1163.3669119950266, 138.5871313283381], 
"eval_len": [256, 337, 39, 147, 242, 214, 195, 209, 515, 70]}

  7%|▋         | 69997/1000000 [33:10<5:14:52, 49.23it/s]global step 70000, trans_decision ep_re 436.1264678717956

{"global_step": 70000, "eval_re": [136.3064511730741, 3.0877644590935875, 
25.62293074214685, 549.2232803855296, 1382.1561643323466, 22.875305434754676, 
956.1502212451094, 364.57198525773657, 18.3596730193279, 902.9109026688366], 
"eval_len": [92, 16, 39, 238, 563, 35, 441, 213, 30, 400]}

  8%|▊         | 79999/1000000 [38:30<5:34:47, 45.80it/s]global step 80000, trans_decision ep_re 598.5493939690706

{"global_step": 80000, "eval_re": [209.27015575975884, 749.3980362571955, 
129.51925914904982, 263.22671480215996, 1091.1105368190008, 469.7997207138837, 
563.3805391767322, 227.4533105432296, 831.602102231733, 1450.7335642379624], 
"eval_len": [113, 313, 78, 166, 478, 217, 254, 157, 358, 548]}

  9%|▉         | 89999/1000000 [43:26<5:35:23, 45.22it/s]global step 90000, trans_decision ep_re 462.5278971298796

{"global_step": 90000, "eval_re": [784.4657484788671, 202.92709834048514, 
159.82981534335724, 741.8188610058693, 39.66089464360542, 697.5177203188451, 
451.8944691850925, 519.7150939665808, 50.81778552675402, 976.6314844893395], 
"eval_len": [315, 115, 107, 290, 48, 284, 227, 211, 56, 379]}

 10%|▉         | 99996/1000000 [48:50<6:39:33, 37.54it/s]global step 100000, trans_decision ep_re 1070.279079321298

{"global_step": 100000, "eval_re": [848.9751864982451, 1221.7303040965369, 
1032.406123143077, 1109.8703767503969, 890.7217109531855, 587.0421638110392, 
1592.0488042363847, 668.697631957935, 1234.096230308455, 1517.202261457725], 
"eval_len": [327, 444, 382, 389, 341, 254, 580, 272, 447, 510]}

 11%|█         | 109998/1000000 [54:10<5:23:36, 45.84it/s]global step 110000, trans_decision ep_re 935.9406053612211

{"global_step": 110000, "eval_re": [780.2118227739568, 828.0418050449349, 
479.4527887497544, 2416.9003608211333, 1208.292744966782, -2.2990953420559728, 
1344.9293919396468, 512.3590521677623, 1248.607727625506, 542.9094548647898], 
"eval_len": [314, 390, 216, 876, 468, 11, 503, 248, 442, 256]}

 12%|█▏        | 119998/1000000 [59:30<5:06:01, 47.93it/s]global step 120000, trans_decision ep_re 1439.3605892096514

{"global_step": 120000, "eval_re": [193.76093484303644, 2461.752885731226, 
823.3567481409651, 546.4134315338416, 3172.924373365764, 2548.3500231968565, 
2076.2823742072505, 431.91636593452296, 670.9339521445213, 1467.9148029985279], 
"eval_len": [100, 759, 323, 200, 1000, 814, 637, 185, 229, 487]}

 13%|█▎        | 129997/1000000 [1:04:40<6:25:10, 37.65it/s]global step 130000, trans_decision ep_re 1485.033515603399

{"global_step": 130000, "eval_re": [1148.9342285941166, 3372.8499769321406, 
2113.104982107047, 1602.8325238232037, 2893.614644752531, 2556.2965374971527, 
33.545109169165116, 16.87886592451125, 32.49987200363758, 1079.7784152304846], 
"eval_len": [384, 1000, 674, 492, 1000, 670, 34, 27, 39, 353]}

 14%|█▍        | 139997/1000000 [1:09:45<4:51:21, 49.20it/s]global step 140000, trans_decision ep_re 867.156205167115

{"global_step": 140000, "eval_re": [226.79078129703217, 503.7956234498176, 
224.6916792302174, 1656.8921776469158, 1229.2646199160886, 819.8938464099862, 
3427.1898520844675, 19.94285429626688, 35.571203346673016, 527.5294139936826], 
"eval_len": [113, 237, 115, 514, 408, 290, 1000, 28, 55, 214]}

 15%|█▍        | 149997/1000000 [1:15:00<5:00:02, 47.22it/s]global step 150000, trans_decision ep_re 1063.1985659135894

{"global_step": 150000, "eval_re": [2310.047679611273, 1495.807323192463, 
626.121800474022, 717.159698106748, 12.927730219046326, 981.2996019180202, 
863.9036751105359, 1150.9712412325757, 500.4863050403616, 1973.260604230848], 
"eval_len": [729, 474, 230, 267, 24, 342, 273, 351, 179, 700]}

 16%|█▌        | 159997/1000000 [1:19:55<4:56:14, 47.26it/s]global step 160000, trans_decision ep_re 855.798065872565

{"global_step": 160000, "eval_re": [975.4729108191434, 421.01721146032395, 
1104.5356437513021, 1952.5740988025227, 1036.3245834849786, 1971.7048011920253, 
142.39561334963162, 737.6084882208362, 199.18678241820987, 17.1605252266766], 
"eval_len": [315, 169, 348, 565, 338, 557, 103, 240, 123, 37]}

 17%|█▋        | 169996/1000000 [1:25:20<4:50:33, 47.61it/s]global step 170000, trans_decision ep_re 1156.7525842451464

{"global_step": 170000, "eval_re": [1974.5850021451959, 845.5256632063636, 
1741.9799641863917, 1419.506607109704, 1721.717379056151, 3143.2884695192465, 
29.90864017760866, 394.80697557640775, 262.83336072988715, 33.373780744506554], 
"eval_len": [561, 267, 498, 512, 497, 849, 54, 173, 125, 41]}

 18%|█▊        | 179999/1000000 [1:30:40<6:10:45, 36.86it/s]global step 180000, trans_decision ep_re 1669.2991730535928

{"global_step": 180000, "eval_re": [2559.0154713539127, 1794.2066499111013, 
3964.144910993433, 3813.306747702219, 1683.0353203795935, 243.15544330544384, 
1503.5746171579967, 323.4757312273253, 342.1325737575575, 466.94426474734547], 
"eval_len": [708, 495, 1000, 1000, 509, 140, 429, 144, 151, 181]}

 19%|█▉        | 189998/1000000 [1:36:00<6:13:39, 36.13it/s]global step 190000, trans_decision ep_re 905.8499497142986

{"global_step": 190000, "eval_re": [1878.001245743706, 822.2411377983856, 
943.1287073569334, 661.7586532101644, 16.113352886940948, 588.7602537142508, 
1609.6965872093201, 813.465492701559, 277.9754137842255, 1447.3586527375007], 
"eval_len": [520, 246, 332, 244, 26, 208, 447, 277, 130, 410]}

 20%|█▉        | 199997/1000000 [1:41:00<4:30:17, 49.33it/s]global step 200000, trans_decision ep_re 573.1118927607057

{"global_step": 200000, "eval_re": [197.05346931225264, 161.61565852683339, 
10.491948531826418, 906.8480215563791, 2092.3032945465243, 15.928738207859666, 
532.1553306701339, 1444.9443475454889, 371.05547497647547, -1.2773562667163179],
"eval_len": [92, 84, 23, 295, 593, 26, 210, 390, 196, 11]}

 21%|██        | 209997/1000000 [1:46:30<4:56:37, 44.39it/s]global step 210000, trans_decision ep_re 1608.4775626371495

{"global_step": 210000, "eval_re": [1734.2717625389057, 2871.946938795048, 
959.5967033725129, 1498.4273917322437, 3996.566393546015, 17.062864917784072, 
249.13814576592762, 3943.506690847257, 775.985649490034, 38.27308536576692], 
"eval_len": [529, 819, 279, 427, 1000, 29, 121, 1000, 262, 56]}

 22%|██▏       | 219997/1000000 [1:51:50<5:49:18, 37.22it/s]global step 220000, trans_decision ep_re 993.4701048025969

{"global_step": 220000, "eval_re": [27.846300812900413, 596.8436224883123, 
255.22686603184724, 3205.4938499339446, 228.89128014502836, 731.7674189995438, 
802.4424181395152, 156.76888247783563, 3705.8712252327755, 223.54918376426588], 
"eval_len": [39, 223, 128, 850, 115, 259, 288, 80, 1000, 110]}

 23%|██▎       | 229995/1000000 [1:56:50<4:12:24, 50.85it/s]global step 230000, trans_decision ep_re 1917.4921175618954

{"global_step": 230000, "eval_re": [360.67041334001806, 676.3951903805728, 
326.64515523956743, 4112.717748013154, 2149.6354838619463, 3958.3106289395323, 
2722.0201218198763, 2166.515027607178, 1269.7935012421262, 1432.2179051749824], 
"eval_len": [160, 233, 145, 1000, 543, 1000, 730, 564, 379, 405]}

 24%|██▍       | 239999/1000000 [2:02:00<4:12:38, 50.14it/s]global step 240000, trans_decision ep_re 1386.4181062795128

{"global_step": 240000, "eval_re": [2586.370955947237, 168.72118588190108, 
48.52124660178018, 294.7322295115326, 798.10105641787, 2032.8457997395294, 
4008.114550951675, 1647.3421778401887, 733.1809813111214, 1546.2508785922935], 
"eval_len": [671, 84, 116, 133, 261, 590, 1000, 439, 245, 419]}

 25%|██▍       | 249999/1000000 [2:07:10<4:22:30, 47.62it/s]global step 250000, trans_decision ep_re 1250.9227442437061

{"global_step": 250000, "eval_re": [28.13990528599465, 95.37337924222626, 
4183.2712853918965, 1235.015196171929, 30.7880649193696, 897.0336094344483, 
1265.5371790649776, 2120.0987243918435, 1294.2217413144356, 1359.7483572199405],
"eval_len": [50, 70, 1000, 366, 51, 319, 367, 575, 358, 409]}

 26%|██▌       | 259997/1000000 [2:12:00<4:03:45, 50.60it/s]global step 260000, trans_decision ep_re 746.2752067950908

{"global_step": 260000, "eval_re": [89.70340994761621, 596.6863713949936, 
465.15480708962474, 801.0534985493961, 687.5228751239665, 615.2160245575176, 
1757.8745030504792, 1677.0633690924806, 415.60281951335645, 356.874389631475], 
"eval_len": [173, 237, 167, 249, 225, 218, 472, 461, 169, 140]}

 27%|██▋       | 269995/1000000 [2:17:01<4:01:55, 50.29it/s]global step 270000, trans_decision ep_re 1745.278855967606

{"global_step": 270000, "eval_re": [2321.5627102896483, 1397.5091463741487, 
3226.161485883281, 1921.515789510175, 601.1424320325889, 3880.667973498435, 
178.27987921740078, 2039.0559435040143, 334.7823002818931, 1552.1108990844768], 
"eval_len": [593, 420, 784, 505, 209, 933, 90, 538, 134, 412]}

 28%|██▊       | 279997/1000000 [2:22:11<3:58:17, 50.36it/s]global step 280000, trans_decision ep_re 1977.8840435896448

{"global_step": 280000, "eval_re": [2698.883495168582, 331.9275439184219, 
1587.2714585477408, 1462.3119732867067, 3251.523676841068, 910.1313291612358, 
4208.478471343353, 355.04789522168545, 1441.241134569082, 3532.0234578385725], 
"eval_len": [705, 138, 427, 398, 760, 273, 975, 142, 394, 836]}

 29%|██▉       | 289997/1000000 [2:26:41<3:54:41, 50.42it/s]global step 290000, trans_decision ep_re 1893.667010663855

{"global_step": 290000, "eval_re": [4384.55912545655, 1211.9478223819845, 
131.35163384417532, 4240.115652040595, 2287.2906154363827, 1250.1478636469105, 
646.1679398696467, 1079.0233230130534, 19.18179209304315, 3686.8843388562104], 
"eval_len": [1000, 346, 83, 1000, 615, 358, 256, 341, 23, 868]}

 30%|██▉       | 299995/1000000 [2:31:21<3:51:58, 50.29it/s]global step 300000, trans_decision ep_re 2195.9180974737114

{"global_step": 300000, "eval_re": [1575.0998479631353, 1707.6327953545779, 
3577.942461962796, 813.9707471281654, 2927.1331802419745, 4249.231599929912, 
2182.1712515689346, 70.53084596283962, 3610.0320182030514, 1245.4362264217273], 
"eval_len": [412, 435, 888, 317, 749, 1000, 542, 77, 867, 381]}

 31%|███       | 309995/1000000 [2:35:43<3:48:12, 50.39it/s]global step 310000, trans_decision ep_re 1374.7236733458653

{"global_step": 310000, "eval_re": [1840.4339843682212, 1577.5670910596566, 
2664.108452101346, 876.3716313596881, 1276.674273415036, 1416.06685542479, 
1898.3365319181903, 998.4603812038348, 729.928540542931, 469.28899206496186], 
"eval_len": [478, 405, 656, 260, 352, 382, 503, 308, 219, 180]}

 32%|███▏      | 319996/1000000 [2:40:31<4:06:00, 46.07it/s]global step 320000, trans_decision ep_re 1137.823310187418

{"global_step": 320000, "eval_re": [1559.2623417948435, 40.267320060664375, 
760.9878838800275, 1144.2879889963178, 3819.823004088028, 733.7601780409091, 
2053.762065518951, 714.2220953484899, 21.92397549344388, 529.9362486525018], 
"eval_len": [400, 46, 239, 363, 930, 226, 545, 259, 32, 190]}

 33%|███▎      | 329999/1000000 [2:45:41<3:56:32, 47.21it/s]global step 330000, trans_decision ep_re 1284.0054848035757

{"global_step": 330000, "eval_re": [2042.8621514497192, 1749.5321224391796, 
2490.812412523745, 1289.3359975005624, 1005.1462013838645, 418.76027708376307, 
1153.6629782488424, 378.9570344208601, 1824.5480202960275, 486.4376526891934], 
"eval_len": [499, 441, 589, 338, 286, 153, 341, 145, 468, 170]}

 34%|███▍      | 339995/1000000 [2:50:42<3:46:29, 48.57it/s]global step 340000, trans_decision ep_re 1726.2209140080674

{"global_step": 340000, "eval_re": [3405.557383429445, 2589.0244044093774, 
987.2055369063013, 29.493097112635844, 1117.8786319157173, 291.88508676515227, 
401.10043047794346, 4547.83417878374, 3437.281660698576, 454.948729581785], 
"eval_len": [793, 608, 328, 41, 329, 126, 159, 1000, 811, 183]}

 35%|███▍      | 349996/1000000 [2:56:02<3:46:40, 47.79it/s]global step 350000, trans_decision ep_re 878.930109875944

{"global_step": 350000, "eval_re": [2521.871221242012, 670.6651590028189, 
1127.0936399602085, 766.653453619713, 767.5194017742841, 449.5206999441015, 
434.5490024618888, 1282.4010930942848, 357.3998688776624, 411.62755878246526], 
"eval_len": [617, 222, 314, 240, 256, 196, 157, 358, 151, 153]}

 36%|███▌      | 359997/1000000 [3:01:31<3:31:09, 50.52it/s]global step 360000, trans_decision ep_re 1190.3353532257572

{"global_step": 360000, "eval_re": [344.5220594243687, 1208.1289362126581, 
2072.5920616292096, 2521.4290476731485, 1496.734680000446, 605.4652503071173, 
2418.042001629277, 43.03436825755543, 1143.1300909174468, 50.275036206345355], 
"eval_len": [154, 380, 527, 632, 398, 215, 601, 41, 361, 50]}

 37%|███▋      | 369995/1000000 [3:06:14<3:47:26, 46.17it/s]global step 370000, trans_decision ep_re 761.6696589294213

{"global_step": 370000, "eval_re": [1379.137185796361, 76.57724313102696, 
30.46380810248323, 1747.2531948925948, 196.344333711982, 19.17974543037598, 
1009.1628247301278, 84.79029263402707, 2073.3591926759436, 1000.4287681892908], 
"eval_len": [404, 57, 44, 443, 107, 29, 341, 89, 640, 290]}

 38%|███▊      | 379998/1000000 [3:11:07<5:47:14, 29.76it/s]global step 380000, trans_decision ep_re 831.4468946630972

{"global_step": 380000, "eval_re": [471.31352246419283, 1009.6693971842103, 
102.62197880091327, 2380.100213644615, 1024.1303340456154, 2467.001929843841, 
15.05607044845465, 226.5315311914204, 146.03939531248503, 472.0045736952252], 
"eval_len": [187, 304, 71, 611, 309, 592, 22, 122, 81, 220]}

 39%|███▉      | 389998/1000000 [3:15:52<4:32:50, 37.26it/s]global step 390000, trans_decision ep_re 1353.2307200306273

{"global_step": 390000, "eval_re": [720.0305407718188, 961.13943771968, 
502.42498299974886, 911.2896925332715, 104.52593464668111, 2652.0935187723403, 
1230.3343927677006, 4193.319859614625, 2196.9710346149573, 60.177805865448605], 
"eval_len": [228, 294, 172, 279, 67, 614, 343, 897, 512, 58]}

 40%|███▉      | 399995/1000000 [3:21:08<3:18:54, 50.27it/s]global step 400000, trans_decision ep_re 600.6434752407782

{"global_step": 400000, "eval_re": [1072.391894011594, 81.24783440176718, 
114.25439462614594, 22.38367641805484, 2132.9019706035074, 288.34942739221924, 
78.44085079193815, 189.1404924549741, 303.8195157171031, 1723.5046959904773], 
"eval_len": [301, 61, 75, 37, 524, 136, 63, 101, 172, 463]}

 41%|████      | 409997/1000000 [3:26:21<4:29:02, 36.55it/s]global step 410000, trans_decision ep_re 1153.1453895405937

{"global_step": 410000, "eval_re": [1485.187428464248, 21.25388796903153, 
3050.3454259238033, 2013.5336238411514, 931.7680946186136, 1518.824388111488, 
17.094882856078662, 504.4193508515111, 614.7933941313056, 1374.2334186387054], 
"eval_len": [372, 32, 705, 509, 282, 397, 30, 212, 194, 341]}

 42%|████▏     | 419995/1000000 [3:31:24<3:10:24, 50.77it/s]global step 420000, trans_decision ep_re 498.7505110568224

{"global_step": 420000, "eval_re": [1184.4624770892478, 976.1315488754008, 
94.02130341120514, 286.947590909229, 63.818336456477645, 291.5232268744468, 
14.947985106404479, 716.4009177257425, 831.1674831830076, 528.0842409370629], 
"eval_len": [316, 270, 59, 120, 58, 117, 26, 222, 326, 170]}

 43%|████▎     | 429997/1000000 [3:35:47<3:06:42, 50.88it/s]global step 430000, trans_decision ep_re 936.3497160885572

{"global_step": 430000, "eval_re": [900.7644604438316, 302.9689238718659, 
2135.2257697935866, 267.41653831249613, 270.96870676381917, 289.95117398493153, 
28.85798061671664, 1394.237498502338, 1816.736290523691, 1956.3698180722945], 
"eval_len": [265, 150, 539, 126, 133, 118, 50, 417, 450, 488]}

 44%|████▍     | 439999/1000000 [3:40:15<3:04:55, 50.47it/s]global step 440000, trans_decision ep_re 664.5345912456906

{"global_step": 440000, "eval_re": [439.15377950729714, 337.385324127087, 
555.7363507077653, 1178.8516435537726, 1040.3467787829659, 111.3233740186848, 
764.3734058754795, 905.4808072726807, 511.06887386285905, 801.625574748315], 
"eval_len": [172, 139, 193, 382, 284, 86, 270, 282, 182, 238]}

 45%|████▍     | 449995/1000000 [3:44:51<3:00:42, 50.72it/s]global step 450000, trans_decision ep_re 1132.6547655091829

{"global_step": 450000, "eval_re": [245.39822198298958, 4028.6641823623927, 
21.817745419682712, 2128.9477240930432, 589.0534214943425, 1350.681449848056, 
1701.5097863640049, 201.39253803640162, 496.7600749707083, 562.3225105202054], 
"eval_len": [147, 999, 34, 561, 228, 457, 479, 113, 175, 201]}

 46%|████▌     | 459995/1000000 [3:49:08<2:57:31, 50.70it/s]global step 460000, trans_decision ep_re 639.7514320176417

{"global_step": 460000, "eval_re": [128.8337291601159, 604.2563060974386, 
86.26117148453679, 385.14769286761725, 873.6832290548606, 2625.533211607717, 
108.1952565943214, 1431.2716523645877, 26.715219294095178, 127.61685165112799], 
"eval_len": [76, 215, 70, 148, 267, 700, 79, 410, 39, 100]}

 47%|████▋     | 469997/1000000 [3:53:34<2:54:10, 50.72it/s]global step 470000, trans_decision ep_re 372.24465859646943

{"global_step": 470000, "eval_re": [20.218055803046422, 347.87074844166887, 
11.813459958404783, 566.7260961811161, 513.8777390581988, 290.56006793088784, 
201.61615822843382, 21.99519595274303, 800.4688720897392, 947.3001923204556], 
"eval_len": [24, 185, 24, 192, 193, 140, 126, 33, 231, 333]}

 48%|████▊     | 479997/1000000 [3:57:59<2:51:09, 50.63it/s]global step 480000, trans_decision ep_re 847.136972052673

{"global_step": 480000, "eval_re": [38.56408825620995, 257.45418634898436, 
3071.6482240939995, 142.9185934272473, 129.40264409096594, 271.2191751484544, 
3858.8460396160426, 426.36083995050313, 28.037003612373752, 246.9189259819489], 
"eval_len": [38, 118, 701, 79, 149, 130, 934, 188, 38, 115]}

 49%|████▉     | 489999/1000000 [4:02:26<2:47:51, 50.64it/s]global step 490000, trans_decision ep_re 634.7916165647724

{"global_step": 490000, "eval_re": [469.7877559059998, 361.6255035643543, 
20.558698457566436, 2199.6755284681485, 918.6985500889479, 36.656853042918314, 
831.7982385331117, 139.04150904933888, 1339.1654983381068, 30.90803019923173], 
"eval_len": [173, 129, 39, 521, 266, 56, 248, 78, 346, 41]}

 50%|████▉     | 499995/1000000 [4:07:02<2:45:45, 50.28it/s]global step 500000, trans_decision ep_re 562.2438577230845

{"global_step": 500000, "eval_re": [305.62190540269955, 648.5384164309737, 
885.5446992351236, 12.094974573780119, 129.96946017789054, 856.604965300832, 
82.99054486018078, 1822.5732790160087, 852.0372102650056, 26.463121968350364], 
"eval_len": [137, 210, 260, 24, 81, 232, 94, 446, 249, 29]}

 51%|█████     | 509995/1000000 [4:11:16<2:41:12, 50.66it/s]global step 510000, trans_decision ep_re 833.3165294075492

{"global_step": 510000, "eval_re": [23.79122598427073, 150.97611724169818, 
433.04915480387496, 18.369643027898995, 1777.6524951768058, 105.14894677723639, 
507.76601923165623, 3039.0892381754193, 1733.7567647430212, 543.5656889136092], 
"eval_len": [26, 81, 156, 23, 468, 57, 200, 669, 404, 196]}

 52%|█████▏    | 519997/1000000 [4:15:43<2:37:49, 50.69it/s]global step 520000, trans_decision ep_re 981.6962559159356

{"global_step": 520000, "eval_re": [155.61850859698956, 1600.654442347501, 
1907.861081536676, 97.91812332635605, 393.3699522382893, 1151.0386892018328, 
2680.535213250662, 218.66148456875743, 830.6287440413773, 780.6763200509146], 
"eval_len": [79, 406, 489, 216, 152, 316, 660, 155, 238, 268]}

 53%|█████▎    | 529997/1000000 [4:20:22<2:34:54, 50.57it/s]global step 530000, trans_decision ep_re 1828.122508836662

{"global_step": 530000, "eval_re": [737.7694105910634, 575.6317805427956, 
2330.6858512044623, -1.9779347510143321, 2585.358750514243, 3470.024217697457, 
4553.8663971830665, 156.21229873844342, 924.6625937372396, 2948.9917229088646], 
"eval_len": [291, 236, 576, 12, 590, 861, 1000, 77, 259, 743]}

 54%|█████▍    | 539999/1000000 [4:25:02<2:31:12, 50.70it/s]global step 540000, trans_decision ep_re 2756.7281738969605

{"global_step": 540000, "eval_re": [1713.1294964895083, 4308.80494699507, 
112.41493958142739, 4633.982210627171, 4653.970402459979, 4620.919610748512, 
2085.9439151422093, 304.2456808702308, 3294.3091745553443, 1839.5613615001514], 
"eval_len": [522, 1000, 92, 1000, 1000, 1000, 509, 140, 870, 463]}

 55%|█████▍    | 549995/1000000 [4:29:20<2:27:44, 50.76it/s]global step 550000, trans_decision ep_re 193.23425853067292

{"global_step": 550000, "eval_re": [101.71853868960402, 574.5168552340149, 
71.23780210500644, 95.99815595210217, 94.83629104034696, 104.98117002619024, 
102.59188310574699, 324.80698535514944, 96.10669733550087, 365.5482064630673], 
"eval_len": [61, 212, 101, 60, 63, 71, 69, 146, 55, 171]}

 56%|█████▌    | 559997/1000000 [4:33:42<2:24:30, 50.75it/s]global step 560000, trans_decision ep_re 16.553634549690273

{"global_step": 560000, "eval_re": [16.98333035627073, 14.364194066780684, 
13.976627686980965, 16.46028843718809, 16.806570392318314, 18.60113633262212, 
16.401117888188764, 19.545726611133848, 14.475253183052192, 17.92210054236701], 
"eval_len": [21, 20, 21, 21, 21, 22, 21, 22, 20, 21]}

 57%|█████▋    | 569997/1000000 [4:38:12<2:21:34, 50.62it/s]global step 570000, trans_decision ep_re 1019.6198280871922

{"global_step": 570000, "eval_re": [59.17518117202531, 323.0722117559536, 
193.62329363054252, 30.486708566119905, 1604.5925295096838, 292.2754567980195, 
1626.5381044660348, 1168.860229365161, 22.305780678602954, 4875.26878492978], 
"eval_len": [56, 133, 110, 44, 386, 206, 384, 295, 35, 1000]}

 58%|█████▊    | 579995/1000000 [4:42:42<2:17:08, 51.04it/s]global step 580000, trans_decision ep_re 1544.526056229741

{"global_step": 580000, "eval_re": [1982.191838962435, 3112.269397956268, 
4726.643984188196, 18.208947021063874, 1031.9385893287263, 1462.1921866147065, 
1171.5210823065725, 1410.3205432032153, 19.602485317331045, 510.3715073988947], 
"eval_len": [495, 676, 1000, 22, 300, 423, 368, 385, 31, 246]}

 59%|█████▉    | 589999/1000000 [4:47:12<2:15:08, 50.56it/s]global step 590000, trans_decision ep_re 1607.3945002606558

{"global_step": 590000, "eval_re": [1918.5065225559888, 4803.194950758238, 
1141.4715471327859, 3727.132234559896, 299.6060044544011, 141.98097596096423, 
2212.377517826462, 329.53652427248346, 25.065711494538807, 1475.073013590802], 
"eval_len": [488, 1000, 324, 1000, 159, 76, 497, 135, 35, 417]}

 60%|█████▉    | 599995/1000000 [4:51:26<2:11:14, 50.80it/s]global step 600000, trans_decision ep_re 1198.9413002224662

{"global_step": 600000, "eval_re": [1198.6754135729227, 985.5173774899012, 
686.9674764442404, 187.99937424599983, 1830.199429452662, 527.3445612602166, 
115.2782352853732, 2364.8445223910676, 2468.3811890478546, 1624.205423034424], 
"eval_len": [329, 290, 212, 100, 452, 178, 77, 538, 636, 422]}

 61%|██████    | 609997/1000000 [4:55:54<2:07:48, 50.86it/s]global step 610000, trans_decision ep_re 1693.3850980178727

{"global_step": 610000, "eval_re": [2264.781162494628, 13.868460770308939, 
1850.3581156385035, 21.036025799779328, 2727.0968040903726, 170.32252963872443, 
4817.218470697199, 12.298851241849269, 3760.756339253027, 1296.1142205543356], 
"eval_len": [558, 27, 442, 32, 710, 107, 1000, 143, 817, 332]}

 62%|██████▏   | 619999/1000000 [5:00:23<2:05:03, 50.64it/s]global step 620000, trans_decision ep_re 1000.684098234937

{"global_step": 620000, "eval_re": [1073.5065985045724, 1856.0471754109808, 
363.1954228619531, 1734.0265759878148, 1061.6905585278723, 186.6030856230011, 
2462.5684473505507, 1110.1368127533624, 130.4765534680566, 28.589751861204526], 
"eval_len": [306, 447, 152, 453, 289, 113, 648, 292, 76, 43]}

 63%|██████▎   | 629995/1000000 [5:05:02<2:01:40, 50.68it/s]global step 630000, trans_decision ep_re 1071.5002951963038

{"global_step": 630000, "eval_re": [23.939768134868523, 460.2914218495728, 
401.1269089438638, 222.15549119033167, 786.8669748947141, 1999.2510663168753, 
3555.089816697085, 554.5155310671979, 1712.3922156487404, 999.3737572197876], 
"eval_len": [37, 174, 165, 290, 276, 515, 863, 186, 447, 354]}

 64%|██████▍   | 639995/1000000 [5:09:32<1:57:46, 50.95it/s]global step 640000, trans_decision ep_re 1685.4596428965444

{"global_step": 640000, "eval_re": [1493.1742026922002, 1305.1088097321147, 
107.43091089031786, 4585.822246882813, 4590.250876786341, 1677.8104675333843, 
237.18624261744344, 2717.4308974752003, 1.9846748012582471, 138.39709955437075],
"eval_len": [371, 334, 66, 1000, 1000, 443, 191, 590, 22, 100]}

 65%|██████▍   | 649995/1000000 [5:13:44<1:54:54, 50.77it/s]global step 650000, trans_decision ep_re 1271.0900478019823

{"global_step": 650000, "eval_re": [1727.3519324895944, 3461.409905769205, 
1698.21901655777, 114.77247565195452, 1158.108409647154, 50.972931675515696, 
1230.2563750954819, 1116.540377032334, 724.2545814154796, 1429.014472685332], 
"eval_len": [543, 729, 414, 67, 297, 74, 315, 282, 211, 437]}

 66%|██████▌   | 659995/1000000 [5:18:22<1:51:31, 50.81it/s]global step 660000, trans_decision ep_re 1124.0788254025815

{"global_step": 660000, "eval_re": [497.77600351381176, 1293.3387790419097, 
1841.134058003946, 1228.0982889219817, 707.8281074123181, 1917.2690010391252, 
822.1861744574775, 303.4380996883781, 2609.983533817719, 19.736208129148764], 
"eval_len": [208, 336, 425, 306, 215, 478, 260, 120, 582, 31]}

 67%|██████▋   | 669995/1000000 [5:22:38<1:48:34, 50.66it/s]global step 670000, trans_decision ep_re 859.8450494295348

{"global_step": 670000, "eval_re": [584.7312494417164, 167.26929238803555, 
1214.4953472592779, 309.43170178376477, 1953.9272061651584, 1980.5943576418513, 
165.26142917206633, 1167.2911395244485, 1036.1277417855447, 19.321029133484], 
"eval_len": [224, 89, 307, 118, 430, 433, 103, 293, 263, 36]}

 68%|██████▊   | 679997/1000000 [5:27:12<1:44:58, 50.81it/s]global step 680000, trans_decision ep_re 1463.344921253853

{"global_step": 680000, "eval_re": [1296.612370385631, 3596.647923035263, 
4407.798309963271, 1077.2166137173917, 3406.1734063949984, 379.46500271534575, 
36.79666813204063, 142.01775790635796, 149.7660898648123, 140.95507042342112], 
"eval_len": [375, 775, 1000, 288, 817, 152, 56, 72, 105, 99]}

 69%|██████▉   | 689995/1000000 [5:31:42<1:41:58, 50.67it/s]global step 690000, trans_decision ep_re 1239.5013867698472

{"global_step": 690000, "eval_re": [19.501184318750237, 1785.1866926320552, 
3407.757220673221, 741.4167734964819, 21.372506973551282, 1477.717034732699, 
43.459456284553035, 2486.076922686306, 1757.8316889607643, 654.6943869400884], 
"eval_len": [32, 438, 730, 218, 44, 360, 51, 589, 452, 189]}

 70%|██████▉   | 699995/1000000 [5:35:56<1:38:25, 50.80it/s]global step 700000, trans_decision ep_re 1160.1049208177797

{"global_step": 700000, "eval_re": [547.9529979444874, 122.48382276343813, 
587.2092240165688, 46.488098315044915, 4812.694869735075, 507.076050120497, 
315.17847607537624, 124.18194369228915, 483.1374979186755, 4054.646227596345], 
"eval_len": [237, 67, 196, 98, 985, 164, 119, 116, 202, 1000]}

 71%|███████   | 709997/1000000 [5:40:23<1:35:02, 50.85it/s]global step 710000, trans_decision ep_re 1317.880983481578

{"global_step": 710000, "eval_re": [1217.7949256482889, 35.26144252318526, 
4714.2301283727365, 3341.944867190016, 51.896595612625234, 174.4851196851507, 
637.1547268661378, 34.74955443334083, 663.3466534006823, 2307.9458210836146], 
"eval_len": [338, 37, 1000, 736, 45, 107, 206, 45, 225, 518]}

 72%|███████▏  | 719999/1000000 [5:44:52<1:53:28, 41.13it/s]global step 720000, trans_decision ep_re 552.6917204348999

{"global_step": 720000, "eval_re": [1003.8117945088763, 1895.1560961613432, 
-2.6636069988437194, 34.750871478554934, 210.0446275703501, 302.4714221896108, 
27.13540954234631, 1202.3663832934017, 179.41515409880336, 674.4290525045575], 
"eval_len": [271, 436, 14, 47, 123, 115, 46, 332, 160, 207]}

 73%|███████▎  | 729997/1000000 [5:49:15<1:28:20, 50.94it/s]global step 730000, trans_decision ep_re 1173.6231694483163

{"global_step": 730000, "eval_re": [125.59516257657835, 3074.4559366778385, 
717.3910655185912, 3527.637086047903, 1214.6259475350248, 640.2172620845532, 
483.95625505985095, 115.43048154911719, 1032.8800445460793, 804.0424528876246], 
"eval_len": [78, 691, 219, 773, 300, 190, 152, 77, 265, 231]}

 74%|███████▍  | 739999/1000000 [5:53:53<1:24:59, 50.98it/s]global step 740000, trans_decision ep_re 634.384479864492

{"global_step": 740000, "eval_re": [387.92619204611947, 2088.8487772328976, 
790.6826812041023, 21.06818775407225, 101.53035802586957, 1193.0796100873195, 
390.2152180092276, 560.8406826287271, 782.1146882056609, 27.538403450924335], 
"eval_len": [152, 482, 236, 23, 115, 310, 163, 171, 247, 37]}

 75%|███████▍  | 749999/1000000 [5:58:07<1:21:57, 50.84it/s]global step 750000, trans_decision ep_re 437.9783421496827

{"global_step": 750000, "eval_re": [733.1744152357618, 531.0526833287889, 
75.64613148866519, 1229.1008307625539, 154.66697105478718, 116.63409162239294, 
3.062015168924749, 1472.0635410471027, 29.84147154145714, 34.54127024639187], 
"eval_len": [230, 170, 161, 307, 86, 64, 117, 335, 48, 47]}

 76%|███████▌  | 759995/1000000 [6:02:43<1:18:49, 50.75it/s]global step 760000, trans_decision ep_re 854.4662718816902

{"global_step": 760000, "eval_re": [563.1806892836387, 36.51284205331422, 
38.50652566773561, 59.69566899890402, 950.073724437911, 1046.7473579883826, 
933.3659770184483, 2240.8782359975685, 608.3825463892146, 2067.319150981784], 
"eval_len": [219, 57, 40, 59, 285, 292, 256, 505, 188, 503]}

 77%|███████▋  | 769995/1000000 [6:06:56<1:15:18, 50.91it/s]global step 770000, trans_decision ep_re 1107.5781239477428

{"global_step": 770000, "eval_re": [1219.2390184893231, 479.60081342083544, 
4951.366021725677, 342.9218729873762, 111.4244031641379, 2331.044560361254, 
250.57039596971012, 291.47737062195466, 380.25189086207155, 717.884891875089], 
"eval_len": [325, 179, 1000, 138, 67, 493, 129, 129, 158, 215]}

 78%|███████▊  | 779997/1000000 [6:11:33<1:12:13, 50.76it/s]global step 780000, trans_decision ep_re 1075.3492759671458

{"global_step": 780000, "eval_re": [155.4543082058221, 1210.5413157719004, 
1748.6438257702318, 494.43801438228377, 1078.6788348629655, 1050.3301377900673, 
10.606595925329158, 2849.8550336549283, 115.36289389171455, 2039.581799416216], 
"eval_len": [83, 335, 421, 170, 314, 286, 31, 584, 94, 483]}

 79%|███████▉  | 789995/1000000 [6:15:49<1:09:03, 50.68it/s]global step 790000, trans_decision ep_re 347.06296873934656

{"global_step": 790000, "eval_re": [150.89637422800055, 550.077203436837, 
93.7894469429439, 16.170685966411178, 132.07162180102472, 1309.4275657778323, 
535.5812435835999, 174.67082710234578, 28.416522919323437, 479.5281956351465], 
"eval_len": [96, 192, 68, 30, 67, 351, 236, 97, 42, 170]}

 80%|███████▉  | 799997/1000000 [6:20:23<1:05:30, 50.88it/s]global step 800000, trans_decision ep_re 1270.5243474605215

{"global_step": 800000, "eval_re": [3707.3816892242808, 234.2576473404625, 
714.9929030174537, 1273.4090276852955, 2665.2468891236963, 1014.2231656657981, 
432.7303800732098, 111.24373728495817, 874.3146560676203, 1677.4433791224405], 
"eval_len": [751, 118, 325, 312, 540, 267, 175, 69, 248, 400]}

 81%|████████  | 809995/1000000 [6:24:53<1:02:03, 51.02it/s]global step 810000, trans_decision ep_re 1261.9416201387023

{"global_step": 810000, "eval_re": [95.460707177638, 88.60915182616925, 
1061.088720375898, 2883.5561956739334, 2964.2949523592124, 1387.5178668829383, 
610.4132563535712, 41.91566453042237, 1173.5251868243645, 2313.034499382875], 
"eval_len": [82, 85, 333, 626, 610, 357, 201, 59, 290, 579]}

 82%|████████▏ | 819995/1000000 [6:29:07<59:13, 50.66it/s]global step 820000, trans_decision ep_re 744.7440799081362

{"global_step": 820000, "eval_re": [1039.9676569965527, 2589.9418363082177, 
701.7094029449107, 727.969165636877, 725.9456808760131, 50.73060395611349, 
429.25499329310725, 118.74288034046056, 337.05817384468264, 726.120404884428], 
"eval_len": [292, 532, 203, 217, 216, 44, 176, 71, 132, 205]}

 83%|████████▎ | 829995/1000000 [6:33:30<55:42, 50.87it/s]global step 830000, trans_decision ep_re 668.1032854335942

{"global_step": 830000, "eval_re": [2321.5429143134784, 388.39653456208225, 
287.368907227758, 256.55227080018824, 307.86888331003354, 16.6173908294826, 
610.4882305737555, 1962.1971535597681, 133.1618545152057, 396.83871464418905], 
"eval_len": [519, 146, 151, 129, 136, 27, 208, 467, 76, 176]}

 84%|████████▍ | 839995/1000000 [6:37:54<52:14, 51.04it/s]global step 840000, trans_decision ep_re 241.10842797791352

{"global_step": 840000, "eval_re": [109.44796998237786, 126.80771177743104, 
340.16328380292737, 133.9087424118449, 134.1857159930994, 276.36085769917247, 
289.4538392647814, 279.2291641325025, 600.0548713281571, 121.47212338684095], 
"eval_len": [67, 127, 129, 75, 86, 124, 142, 129, 207, 85]}

 85%|████████▍ | 849997/1000000 [6:42:16<49:11, 50.82it/s]global step 850000, trans_decision ep_re 461.15270715623694

{"global_step": 850000, "eval_re": [121.72035511330198, 144.25402342645663, 
35.49842755109181, 161.26231126634377, 1924.9433434737193, 167.9619560136461, 
580.6365485485502, 216.92298310865627, 657.1045942928217, 601.2225287677812], 
"eval_len": [78, 74, 44, 132, 432, 141, 183, 96, 186, 227]}

 86%|████████▌ | 859999/1000000 [6:46:38<45:54, 50.82it/s]global step 860000, trans_decision ep_re 732.5284156545422

{"global_step": 860000, "eval_re": [35.26867762660021, 265.6131706146785, 
1091.3121341222131, 100.83056040900826, 2134.574443617665, 1290.4470027815964, 
797.8763509308886, 52.4936586752912, 1387.7543789283982, 169.11377883908122], 
"eval_len": [52, 122, 324, 61, 540, 330, 260, 54, 375, 118]}

 87%|████████▋ | 869995/1000000 [6:51:13<42:22, 51.14it/s]global step 870000, trans_decision ep_re 831.1109376377956

{"global_step": 870000, "eval_re": [255.48975787939128, 1092.4253488499953, 
1130.579396613004, 81.41142165792563, 173.7036354122418, 1486.4017478365074, 
1899.2422042329047, 30.581572947128652, 1176.8729237616672, 984.4013671871907], 
"eval_len": [132, 288, 300, 149, 106, 346, 427, 37, 289, 252]}

 88%|████████▊ | 879999/1000000 [6:55:28<39:23, 50.77it/s]global step 880000, trans_decision ep_re 612.1566998499902

{"global_step": 880000, "eval_re": [147.80318838264856, 427.55106877320236, 
2886.7126373071756, 1555.928146034255, 13.96135371126494, 520.8570925304049, 
151.30692967720634, 268.55782619456767, 154.26176620996225, -5.373010320785462],
"eval_len": [79, 153, 711, 379, 26, 161, 81, 112, 82, 15]}

 89%|████████▉ | 889999/1000000 [7:00:04<36:11, 50.65it/s]global step 890000, trans_decision ep_re 615.1656904316967

{"global_step": 890000, "eval_re": [511.1421246694242, 167.4764479315137, 
1354.3067087033455, 416.4611883752613, 1423.1076834626951, 503.5167667317338, 
629.007197023317, 480.5405428414683, 492.33472117737165, 173.7635234008356], 
"eval_len": [185, 89, 328, 140, 332, 159, 189, 159, 184, 99]}

 90%|████████▉ | 899999/1000000 [7:04:18<32:56, 50.59it/s]global step 900000, trans_decision ep_re 441.7945250427956

{"global_step": 900000, "eval_re": [125.80442865212711, 13.676964708149185, 
562.0312647812841, 10.905403761169298, 14.57359905237984, 108.47630853435086, 
1160.2935985558765, 1376.0573590296676, 258.61150308852405, 787.5148202644277], 
"eval_len": [85, 20, 170, 19, 20, 66, 297, 332, 99, 235]}

 91%|█████████ | 909999/1000000 [7:08:40<29:26, 50.96it/s]global step 910000, trans_decision ep_re 65.05074752213595

{"global_step": 910000, "eval_re": [14.591224220340603, 15.980242521679443, 
16.75465473991597, 32.206099178419535, -2.7421263695937936, 108.31419251155006, 
18.04365562031932, 14.258155364194467, 308.97382464223705, 124.12755279229687], 
"eval_len": [22, 22, 22, 33, 13, 67, 25, 21, 133, 73]}

 92%|█████████▏| 919995/1000000 [7:12:59<26:24, 50.51it/s]global step 920000, trans_decision ep_re 469.0397611431172

{"global_step": 920000, "eval_re": [55.42862073747469, 3476.9841866995234, 
139.64540501308292, 210.07140655945298, 182.865756715618, 36.819132199930465, 
162.81404479722872, 155.05379192646154, 127.95116361756187, 142.7641031648377], 
"eval_len": [74, 778, 81, 110, 119, 32, 97, 83, 75, 81]}

 93%|█████████▎| 929995/1000000 [7:17:23<23:00, 50.70it/s]global step 930000, trans_decision ep_re 191.5577285668681

{"global_step": 930000, "eval_re": [103.79176404996858, 81.47828583146172, 
38.50954704895293, 97.37601852492578, 96.72461959185108, 39.78875124395447, 
36.25656055226506, 24.286702605251392, 1293.9656886491127, 103.39934757093714], 
"eval_len": [86, 75, 40, 82, 73, 63, 59, 43, 388, 65]}

 94%|█████████▍| 939997/1000000 [7:21:45<19:56, 50.15it/s]global step 940000, trans_decision ep_re 927.0963694039672

{"global_step": 940000, "eval_re": [95.47933754714104, 752.2352747972858, 
56.421620542288046, 4735.789258404454, 143.78478583435896, 109.48529206589753, 
26.480456317819506, 375.7845011081354, 108.81855857676102, 2866.6846088455304], 
"eval_len": [67, 220, 70, 1000, 78, 68, 26, 187, 76, 626]}

 95%|█████████▍| 949997/1000000 [7:26:10<16:20, 51.02it/s]global step 950000, trans_decision ep_re 764.8705655554915

{"global_step": 950000, "eval_re": [513.0247756702139, 131.52737360077748, 
250.74109415301052, 1652.1716908283274, 3453.4812926919694, 118.39212955638939, 
963.1355392731779, 106.01664480465955, 253.67148431257112, 206.54363066381705], 
"eval_len": [190, 77, 166, 414, 824, 79, 261, 72, 123, 120]}

 96%|█████████▌| 959995/1000000 [7:30:37<13:20, 49.97it/s]global step 960000, trans_decision ep_re 624.3399553262709

{"global_step": 960000, "eval_re": [415.5098684331671, 531.5667500450086, 
987.0275574625684, 2154.226024446398, 565.5019997552549, 560.2176533600139, 
122.83034971899929, 139.35718214250727, 598.5916261042012, 168.57054179459084], 
"eval_len": [154, 176, 257, 522, 179, 167, 77, 77, 173, 101]}

 97%|█████████▋| 969999/1000000 [7:35:05<09:54, 50.43it/s]global step 970000, trans_decision ep_re 483.4110871791871

{"global_step": 970000, "eval_re": [145.86656603798565, 69.07542595467287, 
1934.362120210501, 1338.4865841481578, 112.83349245398426, 540.3513765671124, 
134.55082052139625, 94.76731825127193, 14.108910831276889, 449.70825681551236], 
"eval_len": [120, 77, 430, 369, 79, 210, 79, 77, 25, 205]}

 98%|█████████▊| 979999/1000000 [7:39:44<06:37, 50.34it/s]global step 980000, trans_decision ep_re 1120.8625046577943

{"global_step": 980000, "eval_re": [4318.749934072403, 23.64517798011223, 
1525.393426810154, 474.27416892929295, 1202.6623409238964, 23.27746745643924, 
603.1562138340179, 2062.194798895419, 837.7463223938522, 137.52519528235624], 
"eval_len": [888, 24, 379, 167, 307, 37, 186, 446, 248, 102]}

 99%|█████████▉| 989999/1000000 [7:44:00<03:17, 50.55it/s]global step 990000, trans_decision ep_re 757.7271023492916

{"global_step": 990000, "eval_re": [1298.3734656058275, 105.67319428429465, 
100.40240646179174, 104.58980716160207, 18.024333089186975, 324.1618665169291, 
675.5603017912372, 147.81176019526785, 122.60662298381706, 4680.067265402962], 
"eval_len": [310, 67, 61, 110, 22, 134, 195, 92, 80, 1000]}

100%|█████████▉| 999999/1000000 [7:48:27<00:00, 50.51it/s]global step 1000000, trans_decision ep_re 605.9500424953845

{"global_step": 1000000, "eval_re": [90.92966425385762, 3371.3516241711673, 
645.2384091271114, 107.05306658386003, 594.348251713844, 521.1376392514143, 
106.58312422515131, 25.396198084941386, 23.90105169611904, 573.5613958463792], 
"eval_len": [69, 681, 182, 67, 181, 175, 91, 49, 40, 184]}

100%|██████████| 1000000/1000000 [7:48:37<00:00, 35.56it/s]
