
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [02:21<5:39:47, 48.56it/s]global step 10000, trans_decision ep_re 232.3555526270894

{"global_step": 10000, "eval_re": [39.828167844151125, 271.40292957882855, 
293.6962859430858, 430.76388791471135, 74.03566104382301, 315.53683949232357, 
75.2640212750375, 327.0694076246462, 250.8169837668841, 245.14134178740255], 
"eval_len": [145, 150, 177, 317, 229, 199, 100, 226, 132, 134]}

  2%|▏         | 19999/1000000 [06:55<5:32:02, 49.19it/s]global step 20000, trans_decision ep_re 139.5297555892195

{"global_step": 20000, "eval_re": [183.73998269069992, 207.58591272825856, 
207.06506261194096, 194.73994922435318, 173.33123366216404, 27.705494693129996, 
57.14991573937533, 164.73954874820896, 32.0875095062061, 147.15294628785801], 
"eval_len": [110, 136, 117, 114, 124, 52, 74, 128, 53, 85]}

  3%|▎         | 29999/1000000 [11:26<5:26:05, 49.58it/s]global step 30000, trans_decision ep_re 201.72548686005

{"global_step": 30000, "eval_re": [267.42970967542436, 173.74090427789108, 
220.67504325630827, 208.9548083965934, 242.4735034090889, 228.5922680718045, 
213.8946524664814, 138.18050829348624, 320.06565868610824, 3.247812067313518], 
"eval_len": [134, 96, 113, 109, 122, 140, 133, 94, 164, 12]}

  4%|▍         | 39997/1000000 [15:54<5:17:32, 50.39it/s]global step 40000, trans_decision ep_re 221.0941240461816

{"global_step": 40000, "eval_re": [12.589237098253049, 351.927086084207, 
280.3979367987337, 240.19050572419536, 445.4774731379246, 349.342573502688, 
34.7107508096134, 40.387512469962196, 286.67994893821253, 169.23821589802637], 
"eval_len": [23, 183, 135, 124, 274, 211, 38, 48, 147, 126]}

  5%|▍         | 49997/1000000 [20:40<5:16:51, 49.97it/s]global step 50000, trans_decision ep_re 762.9587477603251

{"global_step": 50000, "eval_re": [1054.3340308243214, 1860.7934718470576, 
788.7135229221103, 944.9208602006747, 521.6970799226481, 687.4589114899552, 
341.0099299864046, 935.5345270212354, 476.82805922034237, 18.2970841685002], 
"eval_len": [452, 1000, 392, 548, 310, 404, 340, 501, 255, 41]}

  6%|▌         | 59999/1000000 [25:10<5:13:11, 50.02it/s]global step 60000, trans_decision ep_re 314.38368000242593

{"global_step": 60000, "eval_re": [473.8085886697452, 249.3752801718214, 
510.8846859939876, 380.9352358963375, 324.34985126218044, 687.3657747474106, 
7.790337637653107, 435.75992403347357, 51.343343218151325, 22.223778393499053], 
"eval_len": [220, 146, 242, 192, 164, 343, 20, 220, 53, 33]}

  7%|▋         | 69997/1000000 [29:30<5:09:28, 50.09it/s]global step 70000, trans_decision ep_re 348.3925965552584

{"global_step": 70000, "eval_re": [426.4928452156417, 441.90626865587393, 
522.9289242441456, 19.442568401887854, 519.6565160567961, 381.1154031430438, 
17.12455841020764, 352.5770568723918, 520.4182075955212, 282.26361695707476], 
"eval_len": [218, 180, 263, 31, 219, 238, 31, 181, 252, 253]}

  8%|▊         | 79995/1000000 [34:02<5:06:28, 50.03it/s]global step 80000, trans_decision ep_re 713.9013057808609

{"global_step": 80000, "eval_re": [957.7769952544194, 179.53549813043526, 
759.8193267127272, 372.9213002665616, 756.9760005062421, 357.9191568848133, 
649.7976138561393, 902.2163561762379, 883.2798709642732, 1318.7709390567609], 
"eval_len": [498, 94, 387, 175, 327, 188, 429, 470, 367, 587]}

  9%|▉         | 89995/1000000 [38:35<5:04:12, 49.86it/s]global step 90000, trans_decision ep_re 687.9561717910537

{"global_step": 90000, "eval_re": [64.30337362624115, 1374.209394145701, 
84.6062743811417, 1098.735173739142, 54.32160378567997, 1689.7464436538926, 
97.27263025791831, 1510.9011204312087, 54.886771388819675, 850.5789325007917], 
"eval_len": [65, 528, 88, 446, 58, 684, 116, 664, 59, 377]}

 10%|▉         | 99999/1000000 [43:20<5:02:59, 49.51it/s]global step 100000, trans_decision ep_re 807.4234189252899

{"global_step": 100000, "eval_re": [1287.8538023612282, 872.6468752700961, 
1415.5535835518954, 399.6207742626676, -0.7957833844679248, 218.14936950500066, 
294.84746001004146, 1990.2777782029973, 1411.9242923521867, 184.1560371212522], 
"eval_len": [601, 463, 555, 215, 16, 138, 185, 967, 577, 98]}

 11%|█         | 109995/1000000 [47:45<5:00:18, 49.39it/s]global step 110000, trans_decision ep_re 781.6815399327521

{"global_step": 110000, "eval_re": [466.3096657505224, 2444.0377423727273, 
313.2192673757179, 157.157998219887, 336.3914657246287, 2552.5824592363833, 
-0.07152250861941889, 1357.4460990969053, 189.93036144532806, 
-0.188137385960502], "eval_len": [286, 1000, 162, 139, 228, 1000, 19, 508, 143, 
18]}

 12%|█▏        | 119999/1000000 [52:21<4:52:07, 50.21it/s]global step 120000, trans_decision ep_re 368.21664944417887

{"global_step": 120000, "eval_re": [212.79439610323524, 886.6237314979408, 
730.0847905686659, 951.06153067221, 0.34385490664019325, 28.864099231162335, 
546.2837856056461, 29.2396115320177, 50.4154246016439, 246.45526972262664], 
"eval_len": [124, 330, 248, 334, 13, 48, 253, 39, 58, 136]}

 13%|█▎        | 129999/1000000 [56:51<4:51:02, 49.82it/s]global step 130000, trans_decision ep_re 1042.8580361524375

{"global_step": 130000, "eval_re": [1548.569869375599, 536.7400654647925, 
214.036913734792, 763.0255034378968, 961.9434163231953, 2058.3795054416587, 
950.0440335411405, 1431.795452588211, 1643.9267820738503, 320.11881954324105], 
"eval_len": [521, 199, 142, 330, 398, 1000, 322, 522, 536, 141]}

 14%|█▍        | 139997/1000000 [1:01:33<5:00:25, 47.71it/s]global step 140000, trans_decision ep_re 1102.5272983089478

{"global_step": 140000, "eval_re": [1936.9011252819769, 1402.200166755546, 
2434.7971927182552, 470.05656960057235, 23.31875016376376, 1002.4735395241546, 
15.397820308530578, 583.1329905886336, 1584.269730221346, 1572.7250979266998], 
"eval_len": [630, 459, 802, 199, 33, 348, 28, 281, 529, 559]}

 15%|█▍        | 149995/1000000 [1:06:32<4:43:01, 50.05it/s]global step 150000, trans_decision ep_re 1578.0988457504948

{"global_step": 150000, "eval_re": [3027.4173972693607, 2887.0021904458354, 
15.977552046332322, 2404.272495965608, 565.8573915914902, 2852.8508732995047, 
2458.856762634303, 217.96142687563372, 1106.0992832455345, 244.69308413134632], 
"eval_len": [1000, 1000, 26, 724, 226, 843, 746, 160, 370, 118]}

 16%|█▌        | 159996/1000000 [1:11:12<4:43:48, 49.33it/s]global step 160000, trans_decision ep_re 1627.532655275051

{"global_step": 160000, "eval_re": [2216.0600104487808, 1443.19034759412, 
2857.330165099148, 2070.9395143540614, 2642.2031819981516, 773.1734786624618, 
28.813589926000112, 37.49278773617227, 3288.2144276215495, 917.9090493100651], 
"eval_len": [751, 455, 831, 632, 1000, 269, 45, 61, 1000, 311]}

 17%|█▋        | 169995/1000000 [1:15:52<7:07:20, 32.37it/s]global step 170000, trans_decision ep_re 1764.5726454691637

{"global_step": 170000, "eval_re": [149.17489640475895, 29.97579935106739, 
741.0867740769753, 3366.4861754988174, 3484.915404644096, 3216.48094489074, 
450.16920535083614, 3465.5407799935238, 2092.6607901815896, 649.2356842992349], 
"eval_len": [155, 42, 235, 1000, 1000, 1000, 175, 1000, 615, 313]}

 18%|█▊        | 179997/1000000 [1:20:32<4:33:22, 49.99it/s]global step 180000, trans_decision ep_re 1189.7921919585892

{"global_step": 180000, "eval_re": [457.1682243387868, 863.7564189863192, 
3147.6129105641476, 803.6885409103528, 1047.42563655818, 543.7672045476357, 
467.7574785159905, 520.5234056047209, 684.9156701886096, 3361.3064293711504], 
"eval_len": [198, 322, 1000, 299, 369, 215, 178, 211, 307, 1000]}

 19%|█▉        | 189995/1000000 [1:25:12<4:30:54, 49.83it/s]global step 190000, trans_decision ep_re 1540.41097613787

{"global_step": 190000, "eval_re": [1629.841941149685, 309.98413947333717, 
3350.5109342144274, 698.5020220526334, 3160.2219924693686, 1217.9486183483398, 
463.7375502784339, 1157.5030270766206, 1693.2217072874328, 1722.6378290284208], 
"eval_len": [457, 126, 1000, 266, 1000, 404, 211, 393, 541, 538]}

 20%|█▉        | 199997/1000000 [1:29:52<4:27:36, 49.82it/s]global step 200000, trans_decision ep_re 1879.421785422474

{"global_step": 200000, "eval_re": [3449.8820643765525, 577.3464559998379, 
529.3139410685109, 1185.2614166520432, 2707.184054432417, 2987.1229218234134, 
-0.10182660959084222, 3395.2834465726264, 3459.300513147456, 
503.62486676146966], "eval_len": [1000, 206, 230, 379, 829, 851, 14, 1000, 1000,
199]}

 21%|██        | 209999/1000000 [1:34:32<4:23:47, 49.91it/s]global step 210000, trans_decision ep_re 1469.1021622511928

{"global_step": 210000, "eval_re": [604.3734491703244, 1652.3481711394288, 
2756.8639384039507, 1206.4630249605182, 1420.8849250678738, 3232.916925861687, 
1007.1718419306309, 10.423771902610481, 768.459053529002, 2031.1165205459008], 
"eval_len": [225, 762, 798, 399, 443, 1000, 316, 22, 264, 620]}

 22%|██▏       | 219997/1000000 [1:39:02<4:19:40, 50.06it/s]global step 220000, trans_decision ep_re 2199.2903404665994

{"global_step": 220000, "eval_re": [3356.7060967418074, 3515.613845139332, 
1829.6708070123661, 2473.133036351943, 43.187955059206615, 1230.2513440231617, 
1683.789064986273, 3003.514970809503, 2323.0941780248722, 2533.9421065175316], 
"eval_len": [958, 1000, 535, 730, 65, 540, 533, 840, 700, 798]}

 23%|██▎       | 229999/1000000 [1:43:32<4:19:18, 49.49it/s]global step 230000, trans_decision ep_re 1481.302521681629

{"global_step": 230000, "eval_re": [3588.6163335790006, 12.318024236421376, 
27.497002446935127, 2548.9875426500234, 3592.4392053958454, 604.4431759630293, 
3711.3953825094272, 667.7182263690898, 30.98755775593701, 28.622765910579048], 
"eval_len": [1000, 34, 45, 715, 1000, 253, 1000, 238, 52, 40]}

 24%|██▍       | 239995/1000000 [1:48:22<4:14:19, 49.81it/s]global step 240000, trans_decision ep_re 1161.520581714398

{"global_step": 240000, "eval_re": [1852.1209356798279, 32.34488351110935, 
1617.5186800557146, 735.2798769521065, 8.17983844728142, 1160.2687048369755, 
18.76501441399637, 3164.79758568454, 437.78432146455265, 2588.1459760978764], 
"eval_len": [624, 55, 522, 278, 19, 386, 34, 889, 179, 836]}

 25%|██▍       | 249995/1000000 [1:53:02<4:12:12, 49.56it/s]global step 250000, trans_decision ep_re 1621.6806409183462

{"global_step": 250000, "eval_re": [3630.5477195577687, 323.5039029057524, 
3048.3491793637786, 1061.4772329565633, 3604.953990572504, 1691.86047395525, 
314.3157092004048, 1135.5625835114017, 1343.5425957591428, 62.69302140089523], 
"eval_len": [1000, 152, 1000, 323, 1000, 522, 151, 368, 417, 66]}

 26%|██▌       | 259999/1000000 [1:57:25<4:09:13, 49.49it/s]global step 260000, trans_decision ep_re 1256.2098591723914

{"global_step": 260000, "eval_re": [2933.6174255848, 3681.3322226386076, 
180.91218173877547, 1976.77431524201, 29.31579184440875, 29.074149895172688, 
25.7563387391162, 2848.7013601812364, 63.495041760276635, 793.1197640995118], 
"eval_len": [801, 1000, 96, 548, 46, 50, 37, 855, 79, 269]}

 27%|██▋       | 269997/1000000 [2:02:23<4:04:24, 49.78it/s]global step 270000, trans_decision ep_re 1856.2892110987864

{"global_step": 270000, "eval_re": [3190.4396000752345, 3750.0854360831486, 
259.93836601125207, 10.774044831223861, 3332.1501520723805, 3557.10420357267, 
3463.138423819454, 771.6288500668196, 200.9355255531656, 26.69750890251308], 
"eval_len": [1000, 1000, 127, 25, 878, 998, 1000, 353, 102, 51]}

 28%|██▊       | 279999/1000000 [2:07:13<3:58:44, 50.26it/s]global step 280000, trans_decision ep_re 1625.9857156470198

{"global_step": 280000, "eval_re": [2090.3365475695864, 1549.7039380610563, 
532.3098062454219, 1034.9583692811868, 3826.286149460506, 3739.8920749817657, 
1797.7633711234514, 227.21176887718872, 1447.8791530174667, 13.515977852565673],
"eval_len": [573, 472, 200, 317, 1000, 946, 540, 116, 434, 27]}

 29%|██▉       | 289999/1000000 [2:11:53<4:15:15, 46.36it/s]global step 290000, trans_decision ep_re 1351.862724203326

{"global_step": 290000, "eval_re": [526.7501596822385, 1304.942278016767, 
1131.2420503327457, 24.937416607820058, 1500.591714526816, 1934.6267380645975, 
1811.0059457718, 2289.4025244088125, 1575.476866174849, 1419.6515484468143], 
"eval_len": [218, 404, 398, 42, 459, 556, 510, 670, 474, 422]}

 30%|██▉       | 299999/1000000 [2:16:33<3:53:12, 50.03it/s]global step 300000, trans_decision ep_re 1707.7509285902186

{"global_step": 300000, "eval_re": [3740.5338840536565, 668.6958920474459, 
-1.235879833802974, 23.58604674202361, 3648.482328457735, 180.61816723264752, 
3328.8713683543815, 614.2953920196064, 1590.3419895772624, 3283.320097251228], 
"eval_len": [960, 266, 17, 39, 1000, 90, 912, 225, 461, 1000]}

 31%|███       | 309999/1000000 [2:21:13<3:50:01, 50.00it/s]global step 310000, trans_decision ep_re 1459.362057533921

{"global_step": 310000, "eval_re": [3580.483877610313, 495.48775341045496, 
30.406062253391507, 3078.7381236026763, 21.376601698493147, 12.148495033384924, 
26.128895133793844, 3578.0851913483953, 1711.4953418003215, 2059.270233447983], 
"eval_len": [1000, 201, 52, 835, 40, 24, 44, 973, 495, 558]}

 32%|███▏      | 319998/1000000 [2:25:37<5:59:50, 31.50it/s]global step 320000, trans_decision ep_re 486.7494982541366

{"global_step": 320000, "eval_re": [191.56930546770792, 9.338235811642107, 
6.770772879311826, 20.517940587860533, 24.201292539702564, 2739.9977304541185, 
28.566511560153053, 690.4241528291401, 7.892617803721043, 1148.2164226080085], 
"eval_len": [97, 20, 17, 34, 50, 731, 41, 219, 19, 357]}

 33%|███▎      | 329999/1000000 [2:30:23<3:45:57, 49.42it/s]global step 330000, trans_decision ep_re 1467.205345250058

{"global_step": 330000, "eval_re": [2504.3335517498613, 2040.492772038972, 
3707.7870675606937, 196.00766295751498, 437.3125140274453, 1564.3949586675471, 
1192.9344127998322, 2781.197984838103, 39.40535532066221, 208.18717253994933], 
"eval_len": [690, 595, 1000, 271, 214, 466, 357, 760, 64, 103]}

 34%|███▍      | 339997/1000000 [2:34:43<3:40:20, 49.92it/s]global step 340000, trans_decision ep_re 139.10342244834962

{"global_step": 340000, "eval_re": [5.253748976061175, 1353.3684261435333, 
4.054133180325533, 3.5300574165821397, 5.306919940066694, 3.763291640529238, 
3.6886480021281165, 3.494236291975771, 4.555333756668719, 4.019429135625547], 
"eval_len": [19, 400, 18, 18, 19, 18, 18, 18, 18, 19]}

 35%|███▍      | 349995/1000000 [2:39:24<3:37:00, 49.92it/s]global step 350000, trans_decision ep_re 1276.724838053684

{"global_step": 350000, "eval_re": [36.983537738365335, 1355.4950072288832, 
44.888370692203786, 2482.9162918302427, 1783.3223002084246, 1355.4513248421172, 
1658.5289529429779, 734.2628639341208, 169.17286271173333, 3146.226868407772], 
"eval_len": [36, 382, 63, 663, 463, 409, 460, 245, 110, 847]}

 36%|███▌      | 359995/1000000 [2:43:54<3:33:32, 49.95it/s]global step 360000, trans_decision ep_re 1875.071707711806

{"global_step": 360000, "eval_re": [831.8703075500107, 4123.783698562408, 
2567.9436139249497, 39.221030360822525, 1012.9818360309312, 1452.6435559062459, 
1409.8290211292442, 2213.733577047773, 3432.622925818392, 1666.0875107872855], 
"eval_len": [265, 1000, 702, 67, 306, 412, 394, 552, 850, 453]}

 37%|███▋      | 369995/1000000 [2:48:34<3:31:03, 49.75it/s]global step 370000, trans_decision ep_re 1287.9777630949868

{"global_step": 370000, "eval_re": [271.63380118752616, 1326.730098109334, 
3248.8620185667914, 1907.1340654363396, 1078.4502687171366, 363.13249868343405, 
51.39435610548387, 415.0105178473529, 4195.540635900918, 21.88937039555234], 
"eval_len": [131, 368, 791, 476, 320, 190, 51, 209, 980, 35]}

 38%|███▊      | 379999/1000000 [2:52:54<3:26:25, 50.06it/s]global step 380000, trans_decision ep_re 1257.8737409563794

{"global_step": 380000, "eval_re": [1203.8361587667114, 1138.1464801017262, 
133.3352775396572, 611.6204901769572, 861.157790638012, 469.4944556902928, 
178.02720663003797, 3770.0563190601542, 1615.5938721394386, 2597.4693588208065],
"eval_len": [348, 365, 78, 381, 280, 209, 118, 1000, 452, 738]}

 39%|███▉      | 389997/1000000 [2:57:44<3:23:33, 49.94it/s]global step 390000, trans_decision ep_re 1233.5873358917202

{"global_step": 390000, "eval_re": [1623.746029914802, 1563.3215923063688, 
1467.910701693282, 40.6187567891062, 1097.4329360560073, 1674.9763227862065, 
18.252709319591066, 241.38549848142173, 1005.1604442803584, 3603.068367290058], 
"eval_len": [418, 399, 393, 56, 341, 432, 31, 129, 326, 866]}

 40%|███▉      | 399999/1000000 [3:02:14<3:21:36, 49.60it/s]global step 400000, trans_decision ep_re 999.9467554974183

{"global_step": 400000, "eval_re": [101.4832049344041, 169.2274585544392, 
102.81561381495945, 100.7941166158744, 2502.373271668639, 31.037327591767294, 
73.56985880645216, 3154.79942454922, 2067.9552243743, 1695.4120540641275], 
"eval_len": [66, 86, 70, 59, 652, 44, 84, 907, 568, 446]}

 41%|████      | 409997/1000000 [3:06:35<3:18:21, 49.57it/s]global step 410000, trans_decision ep_re 269.86112440190925

{"global_step": 410000, "eval_re": [25.732381624500196, 1665.0042041052593, 
25.95141366440209, 26.112784041736006, -5.046405098028911, 112.00191229223512, 
38.425566210781, 355.27333499198534, 35.9242667155367, 419.23178547068517], 
"eval_len": [46, 491, 38, 46, 131, 75, 59, 160, 63, 155]}

 42%|████▏     | 419999/1000000 [3:11:14<3:14:09, 49.79it/s]global step 420000, trans_decision ep_re 838.3371075722309

{"global_step": 420000, "eval_re": [1854.4870764811817, 189.69981087050414, 
797.6069917873159, 1037.9392956320414, 304.74926638626965, 2213.5171783852606, 
152.2049832701402, 16.1488907307831, 1642.029936619341, 174.98764555947048], 
"eval_len": [490, 100, 256, 336, 159, 614, 95, 25, 484, 95]}

 43%|████▎     | 429995/1000000 [3:15:44<3:09:32, 50.12it/s]global step 430000, trans_decision ep_re 1246.1724624795072

{"global_step": 430000, "eval_re": [710.2419310607143, -0.3735680855795745, 
758.0945701677309, 2000.139740055914, 4223.986465943964, 1449.649300697722, 
33.54127567449821, 44.6077988087076, 3092.3398395273202, 149.4972709440802], 
"eval_len": [236, 15, 267, 508, 1000, 425, 58, 55, 764, 82]}

 44%|████▍     | 439995/1000000 [3:20:08<3:09:08, 49.35it/s]global step 440000, trans_decision ep_re 1090.6331540074582

{"global_step": 440000, "eval_re": [124.2332028898517, 10.350489413627004, 
654.936672146133, 3475.7813726107943, 29.80608154828025, 2737.9552053551333, 
1957.7147420317565, 1704.8249953607078, 92.8600827946617, 117.86869592363713], 
"eval_len": [79, 21, 299, 795, 143, 646, 541, 467, 68, 69]}

 45%|████▍     | 449995/1000000 [3:24:40<3:04:06, 49.79it/s]global step 450000, trans_decision ep_re 332.536146963379

{"global_step": 450000, "eval_re": [1186.163584119878, 19.878888697661584, 
23.623577670299852, 24.30980470462196, 1187.0991187566165, 5.021106841831228, 
293.0293444663927, 114.59233568551967, 114.69023036854135, 356.9534783224269], 
"eval_len": [322, 33, 43, 136, 339, 18, 150, 69, 67, 148]}

 46%|████▌     | 459995/1000000 [3:29:07<3:02:29, 49.32it/s]global step 460000, trans_decision ep_re 790.3437044398561

{"global_step": 460000, "eval_re": [14.976846122061927, 1007.9128045725171, 
4325.140651995722, 327.6716343894688, 449.39336314643106, 41.79549196778223, 
708.625826152227, 8.516974918274975, 1008.680398119083, 10.723053014992962], 
"eval_len": [26, 359, 992, 161, 171, 58, 237, 19, 307, 20]}

 47%|████▋     | 469995/1000000 [3:33:36<2:57:19, 49.81it/s]global step 470000, trans_decision ep_re 567.2459013051638

{"global_step": 470000, "eval_re": [249.00734159358504, 1403.6642112198585, 
2170.760960797987, 100.32656698863869, 125.38984240905164, 36.16946173407805, 
163.47497561239786, 3.2487494307196823, 1308.4053474579339, 112.01155580738721],
"eval_len": [111, 434, 549, 66, 81, 167, 221, 20, 351, 74]}

 48%|████▊     | 479995/1000000 [3:38:05<2:53:14, 50.03it/s]global step 480000, trans_decision ep_re 1168.784056343415

{"global_step": 480000, "eval_re": [2406.9545020072496, 404.6737941920609, 
1799.423448574497, 2339.6464957772914, 132.7031214618591, 148.9870821611519, 
436.82512116601697, 3913.246906058858, 24.919175170312354, 80.46091686485144], 
"eval_len": [610, 191, 446, 565, 77, 77, 184, 891, 38, 152]}

 49%|████▉     | 489999/1000000 [3:42:37<2:50:04, 49.98it/s]global step 490000, trans_decision ep_re 567.9642348363576

{"global_step": 490000, "eval_re": [309.33443277123234, 1312.2955167980624, 
649.9777596854445, 757.3467298212526, 88.5752659459843, 98.40971350663523, 
145.1204674947654, 505.4202376477412, 1788.8135564442346, 24.34866824822362], 
"eval_len": [130, 353, 215, 235, 76, 56, 80, 190, 452, 41]}

 50%|████▉     | 499999/1000000 [3:47:06<2:45:24, 50.38it/s]global step 500000, trans_decision ep_re 1261.759346202715

{"global_step": 500000, "eval_re": [839.3252288816849, 1003.2043302724708, 
2517.317826130524, 1974.3134283413656, 823.7299597795429, 844.3548913171358, 
8.199970575512403, 847.4171570054386, 408.0384577010272, 3351.692212022449], 
"eval_len": [322, 308, 652, 604, 269, 256, 22, 259, 157, 801]}

 51%|█████     | 509995/1000000 [3:51:54<2:43:18, 50.01it/s]global step 510000, trans_decision ep_re 1578.7209113898293

{"global_step": 510000, "eval_re": [255.21650868849665, 4.999372909670775, 
788.527803369426, 1254.8916370654852, 4222.127671205272, 466.56710646811825, 
2492.68108489281, 4427.142627570514, 384.96421738018245, 1490.09108434832], 
"eval_len": [140, 48, 269, 361, 938, 187, 609, 1000, 185, 411]}

 52%|█████▏    | 519995/1000000 [3:56:15<2:40:29, 49.85it/s]global step 520000, trans_decision ep_re 967.5191263716594

{"global_step": 520000, "eval_re": [990.0428548803824, 429.2923575848257, 
973.2784452764947, 26.80689690780107, 2122.0238758604764, 16.54445618613527, 
1686.34687615201, 755.2647990608526, 1844.4783718550832, 831.1123299525312], 
"eval_len": [435, 158, 311, 42, 535, 27, 433, 248, 464, 267]}

 53%|█████▎    | 529999/1000000 [4:00:46<2:38:41, 49.36it/s]global step 530000, trans_decision ep_re 1360.5308869238681

{"global_step": 530000, "eval_re": [1376.6552236972836, 55.14199121084215, 
1255.4136991073335, 3673.250288023093, 60.20893518978087, 1333.8120824237565, 
1332.9153935750571, 1397.4896858952438, 1389.2586896423174, 1731.1628804739712],
"eval_len": [361, 75, 348, 827, 139, 352, 503, 384, 425, 430]}

 54%|█████▍    | 539995/1000000 [4:05:19<2:33:27, 49.96it/s]global step 540000, trans_decision ep_re 690.4401011332618

{"global_step": 540000, "eval_re": [432.4876055805583, 84.6856497581551, 
6.253355748156556, 70.38742935604907, 795.44800291544, 674.7532409652055, 
977.2008765861883, 32.39377883720438, 2946.683176495864, 884.1078950897963], 
"eval_len": [167, 171, 187, 70, 275, 216, 282, 46, 676, 258]}

 55%|█████▍    | 549995/1000000 [4:09:50<2:32:09, 49.29it/s]global step 550000, trans_decision ep_re 998.021429546519

{"global_step": 550000, "eval_re": [649.2417097918506, 2477.4743228622765, 
6.308214082637944, 4.592478256433909, 2927.567222698678, 2088.444740508375, 
5.440795802715566, 3.47787602290879, 1780.0943557241105, 37.572579715204036], 
"eval_len": [214, 669, 18, 17, 691, 512, 17, 17, 448, 54]}

 56%|█████▌    | 559997/1000000 [4:14:34<2:27:18, 49.78it/s]global step 560000, trans_decision ep_re 962.2082876596702

{"global_step": 560000, "eval_re": [245.40059678792122, 34.4412748614789, 
379.436785829544, 32.56389486494333, 34.05921237955939, 1300.2286769793368, 
1096.5012573577494, 2248.2301657861412, 761.0994083448865, 3490.121603405141], 
"eval_len": [110, 65, 165, 52, 115, 369, 326, 560, 301, 824]}

 57%|█████▋    | 569997/1000000 [4:18:54<2:25:36, 49.22it/s]global step 570000, trans_decision ep_re 470.98387777277475

{"global_step": 570000, "eval_re": [117.02867454201932, 20.206896354949492, 
232.43667326609852, 1039.077584759075, 1423.564219951912, 967.7172801730243, 
607.2686835173283, 20.134694530519177, 147.6356978010059, 134.76837283181487], 
"eval_len": [70, 33, 110, 301, 363, 279, 202, 80, 79, 77]}

 58%|█████▊    | 579995/1000000 [4:23:21<2:20:24, 49.85it/s]global step 580000, trans_decision ep_re 434.62684695995375

{"global_step": 580000, "eval_re": [179.91784746311316, 193.136507609382, 
373.1111287574997, 802.7622597429059, 192.90205780303958, 307.39699852828534, 
1317.8038715430184, 24.601738652640996, 19.226206891569866, 935.409852608082], 
"eval_len": [90, 101, 152, 270, 92, 130, 341, 35, 33, 323]}

 59%|█████▉    | 589997/1000000 [4:28:04<2:17:23, 49.74it/s]global step 590000, trans_decision ep_re 1246.5281032342666

{"global_step": 590000, "eval_re": [4237.327894663603, 2204.767784576535, 
1105.2293229983359, 28.439820675620734, 17.860054555293797, 20.885985614796034, 
2384.2158351208336, 2432.902852233782, 30.265885398548225, 3.385596505317032], 
"eval_len": [1000, 536, 327, 41, 29, 32, 648, 573, 141, 18]}

 60%|█████▉    | 599999/1000000 [4:32:34<2:12:42, 50.23it/s]global step 600000, trans_decision ep_re 1065.4928464038992

{"global_step": 600000, "eval_re": [33.63272080681897, 998.200208979946, 
3082.3303172714127, 1643.4939468655716, 224.79867306656493, 1239.7319786904109, 
-1.2332642626537926, 30.39463325780361, 916.7630274465961, 2486.8162219165224], 
"eval_len": [55, 297, 674, 392, 129, 450, 13, 43, 310, 593]}

 61%|██████    | 609995/1000000 [4:36:56<2:10:46, 49.70it/s]global step 610000, trans_decision ep_re 371.44733833003255

{"global_step": 610000, "eval_re": [106.23304445782124, 1729.4374015438814, 
299.06116464056686, 350.2235434981367, 23.86425582527993, 29.060488265074767, 
813.4491735764962, 25.445221207259262, 316.9888961912172, 20.710194094592183], 
"eval_len": [63, 457, 162, 134, 44, 40, 263, 41, 136, 35]}

 62%|██████▏   | 619995/1000000 [4:41:25<2:07:25, 49.70it/s]global step 620000, trans_decision ep_re 1165.9985180085478

{"global_step": 620000, "eval_re": [1437.9398983380456, 2422.4564399301707, 
4505.389442299761, 150.28839840604672, 795.1820728528706, 105.63237426209656, 
911.0793146406962, 884.6836462463611, 116.53137382456866, 330.80221928486054], 
"eval_len": [393, 597, 1000, 95, 257, 64, 256, 320, 66, 141]}

 63%|██████▎   | 629999/1000000 [4:45:59<2:04:00, 49.73it/s]global step 630000, trans_decision ep_re 728.2774123922337

{"global_step": 630000, "eval_re": [103.20239334976883, 106.92629831165698, 
19.371655227037802, 30.431267225936423, 1849.189919234544, 4797.367155830724, 
37.072306578218985, 29.723808854812994, 110.11720363328837, 199.3721156763486], 
"eval_len": [60, 54, 37, 44, 515, 992, 52, 51, 65, 105]}

 64%|██████▍   | 639995/1000000 [4:50:29<2:00:43, 49.70it/s]global step 640000, trans_decision ep_re 517.5302878567161

{"global_step": 640000, "eval_re": [19.706989762966924, 744.5406087975136, 
935.4904545405236, 173.0346964780506, 90.67649326623396, 105.4058072690936, 
115.37159226610414, 158.01816551110397, 2424.6507918291027, 408.40727884646867],
"eval_len": [37, 227, 407, 101, 66, 68, 63, 87, 559, 151]}

 65%|██████▍   | 649997/1000000 [4:54:58<1:57:43, 49.55it/s]global step 650000, trans_decision ep_re 625.9076696029631

{"global_step": 650000, "eval_re": [65.75974976780738, 1846.0461380664035, 
105.00422165003458, 78.38706534944538, 974.9507394202855, 20.395915380925523, 
1281.795128694172, 32.24526977473016, 224.73170983114656, 1629.7607580946806], 
"eval_len": [52, 451, 139, 67, 281, 30, 406, 44, 129, 417]}

 66%|██████▌   | 659999/1000000 [4:59:29<1:54:48, 49.36it/s]global step 660000, trans_decision ep_re 1036.0149638446087

{"global_step": 660000, "eval_re": [140.6812854639276, 3609.0153796602763, 
600.4574962518451, 52.874968889474836, 762.0436156347494, 2303.4814485919405, 
11.95163208387721, 25.745886143932584, 858.4476749123905, 1995.450250813673], 
"eval_len": [74, 748, 221, 89, 260, 509, 22, 46, 241, 502]}

 67%|██████▋   | 669999/1000000 [5:04:14<1:50:15, 49.88it/s]global step 670000, trans_decision ep_re 2023.6814521198717

{"global_step": 670000, "eval_re": [2054.8964398526477, 4905.892808702088, 
1613.0780324908185, 32.27620126306548, 434.0185019176167, 4809.91578158396, 
4804.522793832357, 1082.3659763957596, 19.231140481758008, 480.61684467864416], 
"eval_len": [480, 1000, 379, 46, 174, 1000, 1000, 281, 31, 176]}

 68%|██████▊   | 679995/1000000 [5:08:54<1:52:30, 47.41it/s]global step 680000, trans_decision ep_re 1392.6356731559085

{"global_step": 680000, "eval_re": [94.41227487745803, 399.6920006070726, 
421.2914862213464, 4662.7351295953995, 3410.7483914779127, 1533.405653685985, 
1125.6330310264846, 1333.9398910189068, 658.6516589428658, 285.84721410565487], 
"eval_len": [62, 186, 191, 1000, 745, 371, 328, 362, 223, 136]}

 69%|██████▉   | 689995/1000000 [5:13:24<1:43:54, 49.73it/s]global step 690000, trans_decision ep_re 1371.1089023134814

{"global_step": 690000, "eval_re": [2164.458006868921, 1979.6828134516581, 
4858.803403988802, 190.29493366379552, 2168.3004007789614, 126.43478284653682, 
166.61440475774378, 263.94257802626726, 895.5877772090216, 896.9699215431059], 
"eval_len": [535, 458, 1000, 93, 513, 81, 94, 118, 262, 270]}

 70%|██████▉   | 699999/1000000 [5:17:49<1:44:18, 47.94it/s]global step 700000, trans_decision ep_re 1260.99889764303

{"global_step": 700000, "eval_re": [1567.6086900208752, 12.445822759056693, 
25.06025822144717, 37.807689564990845, 16.581475665603765, 4270.383005014756, 
898.2663859819936, 2687.230142730249, 31.444543371763018, 3063.1609630995645], 
"eval_len": [365, 23, 40, 49, 28, 864, 263, 586, 56, 742]}

 71%|███████   | 709999/1000000 [5:22:34<3:06:02, 25.98it/s]global step 710000, trans_decision ep_re 1639.6918320554992

{"global_step": 710000, "eval_re": [3140.107519280343, 502.3676973512617, 
1304.5311136520493, 2684.137163430499, 4660.7784116298, 109.35965599324568, 
443.5301239541862, 105.42239812783242, 3096.863205433359, 349.82103170241584], 
"eval_len": [704, 207, 318, 622, 925, 62, 173, 96, 651, 203]}

 72%|███████▏  | 719995/1000000 [5:26:59<1:34:34, 49.35it/s]global step 720000, trans_decision ep_re 1237.8485135151702

{"global_step": 720000, "eval_re": [3112.682932516156, 358.25897983838536, 
19.76631139296821, 536.5112240308442, 2545.936803879689, 30.547455060221058, 
39.74668762034683, 4788.5830028688915, 442.88969166769624, 503.5620462765034], 
"eval_len": [670, 156, 40, 206, 560, 42, 60, 1000, 152, 161]}

 73%|███████▎  | 729999/1000000 [5:31:32<1:29:53, 50.06it/s]global step 730000, trans_decision ep_re 907.9115614356303

{"global_step": 730000, "eval_re": [103.05195998779426, 30.525271501710495, 
43.763538452520905, 754.8104568111987, 1185.1787027810644, 797.613951240133, 
121.60823514065478, 2317.9677462479954, 32.06747500047263, 3692.528277192758], 
"eval_len": [68, 43, 51, 238, 317, 276, 81, 538, 51, 806]}

 74%|███████▍  | 739995/1000000 [5:36:15<1:27:23, 49.58it/s]global step 740000, trans_decision ep_re 960.0072902642025

{"global_step": 740000, "eval_re": [1228.950138509349, 1020.3447845488696, 
22.99955669584661, 99.61121768937288, 3884.1872066164096, 25.580028088486387, 
2409.94423201995, 27.91285126318966, 16.179834035480468, 864.363053175071], 
"eval_len": [318, 309, 36, 166, 808, 47, 550, 48, 27, 334]}

 75%|███████▍  | 749995/1000000 [5:40:39<1:23:32, 49.87it/s]global step 750000, trans_decision ep_re 446.57555924700625

{"global_step": 750000, "eval_re": [22.55552006743656, 31.108176179281678, 
99.87203321817461, 114.6039331196487, 95.31789034692162, 14.752349808606159, 
522.8953087656164, 189.48023696580705, 3363.3825195574314, 11.78762444113796], 
"eval_len": [42, 43, 53, 64, 65, 30, 235, 95, 1000, 25]}

 76%|███████▌  | 759997/1000000 [5:45:09<1:19:49, 50.11it/s]global step 760000, trans_decision ep_re 1352.2783041019106

{"global_step": 760000, "eval_re": [2527.5878647195254, 1804.8315521490247, 
193.17319093706368, 5023.635172648169, 1508.4706305906352, 151.44386297565995, 
394.4907695530303, 1861.8586007973279, 43.42470675270673, 13.866689895965898], 
"eval_len": [555, 418, 100, 1000, 360, 83, 141, 420, 66, 26]}

 77%|███████▋  | 769995/1000000 [5:49:55<1:16:45, 49.94it/s]global step 770000, trans_decision ep_re 2207.7951941333276

{"global_step": 770000, "eval_re": [5217.799008533935, 627.4107953076928, 
4324.766996129647, 1150.0700963884603, 1508.8578932676876, 39.06908197353426, 
4850.543282373884, 3275.864132051706, 144.8734326902098, 938.6972226165183], 
"eval_len": [1000, 241, 853, 289, 364, 69, 1000, 672, 90, 294]}

 78%|███████▊  | 779999/1000000 [5:54:35<1:13:30, 49.88it/s]global step 780000, trans_decision ep_re 1621.6003844083712

{"global_step": 780000, "eval_re": [1899.9054633064789, 2682.41899884323, 
1002.9677394697142, 4886.126049694172, 233.569887161699, 861.0712368508092, 
427.82888817195675, 1578.4281616875728, 2595.9257740540183, 47.76164484406144], 
"eval_len": [540, 588, 302, 1000, 128, 281, 165, 533, 585, 105]}

 79%|███████▉  | 789999/1000000 [5:59:15<1:16:12, 45.93it/s]global step 790000, trans_decision ep_re 1788.9929142708054

{"global_step": 790000, "eval_re": [502.159964942756, 3868.789318519447, 
30.223029701535964, 16.626707466628897, 2519.164962281944, 210.75228784705172, 
4852.145980420742, 4862.4321816136835, 697.0260089823119, 330.60870093195155], 
"eval_len": [200, 789, 41, 27, 622, 105, 1000, 1000, 213, 136]}

 80%|███████▉  | 799997/1000000 [6:03:45<1:07:02, 49.73it/s]global step 800000, trans_decision ep_re 691.1734397775236

{"global_step": 800000, "eval_re": [399.19105359779246, 1078.0341209870662, 
112.6817855317336, 428.245431593499, 695.0631199022152, 518.3799806889658, 
2957.153638915093, 249.51783701838534, 19.689928101974917, 453.77750143851057], 
"eval_len": [137, 334, 80, 151, 218, 176, 618, 144, 42, 151]}

 81%|████████  | 809995/1000000 [6:08:15<1:03:32, 49.84it/s]global step 810000, trans_decision ep_re 898.713073861037

{"global_step": 810000, "eval_re": [31.97149631541205, 8.44447511747625, 
1742.6698359794352, 13.42251820336531, 924.6618741600552, 2364.25276361843, 
29.57375278588284, 65.89015777807218, 3790.6899077086596, 15.553956943580738], 
"eval_len": [55, 24, 400, 22, 330, 573, 51, 87, 784, 27]}

 82%|████████▏ | 819999/1000000 [6:12:36<59:51, 50.13it/s]  global step 820000, trans_decision ep_re 1375.1781022759221

{"global_step": 820000, "eval_re": [5191.0618312517945, 3532.6874159625595, 
848.3857332544113, 618.0473870253522, 128.64158601975723, 210.93649850383656, 
119.14550395538359, 326.60049075021465, 2653.19265287587, 123.08192316004225], 
"eval_len": [963, 692, 220, 243, 70, 168, 77, 119, 543, 70]}

 83%|████████▎ | 829999/1000000 [6:17:08<57:34, 49.21it/s]global step 830000, trans_decision ep_re 752.7757603195126

{"global_step": 830000, "eval_re": [116.2547732086231, 2721.7068260366514, 
844.3537673569542, 102.08131815842827, 514.7155356972669, 487.9769949149049, 
474.50869964819685, 1221.1730029350124, 17.083450193898315, 1027.9032350451907],
"eval_len": [75, 587, 274, 69, 191, 179, 181, 314, 23, 275]}

 84%|████████▍ | 839996/1000000 [6:21:38<53:27, 49.88it/s]global step 840000, trans_decision ep_re 1065.4850454722666

{"global_step": 840000, "eval_re": [169.6970810969293, 153.546682904873, 
424.7776510153524, 108.19748070931075, 159.41443212263917, 3909.221585992132, 
179.98515910457797, 1023.5020585234427, 181.7178005097332, 4344.790522743677], 
"eval_len": [94, 92, 170, 78, 93, 900, 100, 300, 96, 877]}

 85%|████████▍ | 849995/1000000 [6:26:09<50:02, 49.96it/s]global step 850000, trans_decision ep_re 247.0543430813531

{"global_step": 850000, "eval_re": [955.028042703125, 557.9650113019557, 
14.785149932224698, 104.32083303763082, 113.71364124292525, 123.55227278173672, 
114.11502001381639, 23.736326473015627, 23.51541205300782, 439.81172127409326], 
"eval_len": [404, 190, 30, 68, 69, 111, 66, 38, 38, 159]}

 86%|████████▌ | 859999/1000000 [6:30:36<46:45, 49.90it/s]global step 860000, trans_decision ep_re 599.1079154828817

{"global_step": 860000, "eval_re": [1379.580325103177, 701.561929427881, 
531.2840791226333, 119.68582577024918, 559.014999509605, 33.01234233329369, 
2342.116984375153, 113.65463263147188, 103.1250722595729, 108.04296429577994], 
"eval_len": [337, 231, 182, 70, 214, 48, 487, 79, 68, 63]}

 87%|████████▋ | 869999/1000000 [6:35:15<43:23, 49.94it/s]global step 870000, trans_decision ep_re 2234.833540169866

{"global_step": 870000, "eval_re": [838.3576641641647, 3207.915211224569, 
77.3659130991815, 2153.27038044118, 3118.023118991204, 4793.054607421953, 
-0.1574806806742235, 4008.900450745795, 4134.122395706257, 17.48314058503257], 
"eval_len": [231, 667, 105, 507, 652, 1000, 17, 873, 827, 23]}

 88%|████████▊ | 879999/1000000 [6:39:41<39:51, 50.17it/s]global step 880000, trans_decision ep_re 1226.8482072082347

{"global_step": 880000, "eval_re": [1848.8809479184733, 23.98008528317261, 
175.9257578478218, 107.66631042340467, 2905.7878842297937, 782.5900941311368, 
446.4167242370148, 4256.730762600924, 1691.1650310515224, 29.338474359083854], 
"eval_len": [445, 35, 120, 65, 677, 254, 179, 814, 434, 40]}

 89%|████████▉ | 889999/1000000 [6:44:12<36:59, 49.56it/s]global step 890000, trans_decision ep_re 1066.857310042527

{"global_step": 890000, "eval_re": [124.74552498513744, 458.8655169110271, 
108.59421651821809, 85.99397014582229, 2722.879756108591, 90.2240369483155, 
5345.150821463163, 93.66152342164281, 866.8490066412164, 771.6087272821356], 
"eval_len": [66, 179, 67, 64, 554, 58, 1000, 60, 286, 216]}

 90%|████████▉ | 899999/1000000 [6:48:43<33:45, 49.38it/s]global step 900000, trans_decision ep_re 310.2894370007222

{"global_step": 900000, "eval_re": [1222.7917901086805, 1031.9519678428865, 
102.96005247428054, 110.36304284672696, 95.02340020392961, 112.23377889697095, 
100.23887642025028, 119.80844100277494, 104.30996342236963, 103.21305678835209],
"eval_len": [342, 340, 56, 63, 64, 60, 54, 72, 61, 56]}

 91%|█████████ | 909995/1000000 [6:53:10<30:09, 49.75it/s]global step 910000, trans_decision ep_re 592.4916768334703

{"global_step": 910000, "eval_re": [55.496635701706765, 54.18196188687757, 
94.14908558687681, 97.41585200629311, 96.5974497815885, 70.89034379925107, 
211.61634032633842, 18.878897866749664, 4114.930341975607, 1110.7598594034139], 
"eval_len": [66, 62, 52, 212, 62, 69, 113, 34, 841, 283]}

 92%|█████████▏| 919997/1000000 [6:57:55<26:48, 49.72it/s]global step 920000, trans_decision ep_re 2020.1281215174502

{"global_step": 920000, "eval_re": [4335.266205448227, 48.12169367165648, 
4981.892364196249, 350.04144072858725, 0.21590179918134633, 44.30014204129875, 
329.2053756667378, 4949.069676444021, 1245.8348234637062, 3917.333591714839], 
"eval_len": [852, 62, 1000, 146, 17, 54, 185, 965, 339, 865]}

 93%|█████████▎| 929997/1000000 [7:02:25<23:21, 49.93it/s]global step 930000, trans_decision ep_re 1144.286432636504

{"global_step": 930000, "eval_re": [1481.3982015095785, 602.1160669918384, 
3588.3582737416077, 20.83439215632965, 11.1777728967898, 116.14780329528001, 
2779.8178068021075, 2066.6421028276263, 43.67935346973241, 732.6925526741514], 
"eval_len": [461, 184, 775, 32, 21, 68, 642, 467, 72, 212]}

 94%|█████████▍| 939995/1000000 [7:06:43<20:05, 49.77it/s]global step 940000, trans_decision ep_re 283.7679287328926

{"global_step": 940000, "eval_re": [15.36700806805634, 59.53095059195097, 
697.7697469357424, 1465.2062205959926, 105.32024026188184, 95.53374225617074, 
103.48510801311484, 95.65771221161924, 98.43789774342783, 101.37066065096926], 
"eval_len": [23, 94, 206, 362, 62, 60, 60, 61, 60, 60]}

 95%|█████████▍| 949999/1000000 [7:11:09<16:37, 50.11it/s]global step 950000, trans_decision ep_re 602.3644745316851

{"global_step": 950000, "eval_re": [3.1426192206014734, 145.1340136534131, 
200.7001722462033, 33.9128489146706, 475.30367747396184, 1428.4180200455742, 
42.66976607520441, 1037.0627485329128, 303.60213424897097, 2353.6987449053386], 
"eval_len": [165, 74, 108, 43, 216, 334, 66, 293, 130, 501]}

 96%|█████████▌| 959997/1000000 [7:15:38<13:34, 49.14it/s]global step 960000, trans_decision ep_re 792.998436609871

{"global_step": 960000, "eval_re": [16.56088911779916, 137.52168603304068, 
4850.145189050059, 446.1109439262635, 17.244906949293163, 22.91770492399422, 
513.0607816180724, 866.8271731439153, 34.35801795224441, 1025.2370733840282], 
"eval_len": [27, 72, 1000, 172, 28, 37, 178, 244, 45, 279]}

 97%|█████████▋| 969999/1000000 [7:20:06<10:05, 49.51it/s]global step 970000, trans_decision ep_re 1342.5771999217147

{"global_step": 970000, "eval_re": [3269.9861307978663, 1717.1998055746271, 
3382.0349247903437, 146.48682671444612, -2.10186908818777, 2201.9543169931076, 
1936.4097488014618, 205.25483045275254, 543.2083349062608, 25.338949274469407], 
"eval_len": [669, 429, 732, 193, 16, 486, 452, 119, 199, 37]}

 98%|█████████▊| 979997/1000000 [7:24:56<06:40, 49.92it/s]global step 980000, trans_decision ep_re 1982.5672552195924

{"global_step": 980000, "eval_re": [21.66189417105366, 3905.365249079486, 
2041.1100299945547, 352.0086232488122, 3914.419311565727, 238.35285439730765, 
3709.0593312728984, 126.4559514600313, 2065.2851126281375, 3451.9541943779145], 
"eval_len": [37, 784, 484, 143, 881, 104, 743, 72, 498, 804]}

 99%|█████████▉| 989995/1000000 [7:29:14<03:19, 50.09it/s]global step 990000, trans_decision ep_re 692.2810646884466

{"global_step": 990000, "eval_re": [908.734147215546, 1954.1081626669893, 
84.3501690151197, 108.1245141428041, 48.667194531581416, 3545.9756887055223, 
21.623355454576107, 84.23650732314316, 79.31222284080128, 87.67868498838371], 
"eval_len": [281, 514, 63, 62, 75, 785, 34, 163, 78, 81]}

100%|█████████▉| 999999/1000000 [7:33:56<00:00, 50.17it/s]global step 1000000, trans_decision ep_re 1495.9063083444587

{"global_step": 1000000, "eval_re": [2689.9602716377585, 2329.4838028906956, 
970.481110538043, 37.29737868299198, 2243.659986009956, 893.3383388390496, 
87.00433133390703, 2045.0518576042266, 893.431564282995, 2769.3544416249642], 
"eval_len": [688, 495, 269, 58, 701, 241, 63, 447, 291, 619]}

100%|██████████| 1000000/1000000 [7:33:59<00:00, 36.71it/s]
