
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 2,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [02:53<6:22:21, 43.15it/s]global step 10000, trans_decision ep_re 234.8198779328515

{"global_step": 10000, "eval_re": [229.18958227742877, 229.37047045641677, 
235.1546641799126, 228.00438263521224, 228.53088331699578, 229.00608012247318, 
228.6866856434637, 172.43182628788963, 178.56305558717867, 389.2611488215439], 
"eval_len": [42, 42, 43, 42, 42, 42, 42, 33, 34, 73]}

  2%|▏         | 19995/1000000 [08:35<6:35:23, 41.31it/s]global step 20000, trans_decision ep_re 376.96755361804054

{"global_step": 20000, "eval_re": [386.06108910014234, 388.18491570446685, 
363.54442491607756, 353.9112993044744, 348.66530045551946, 456.1345501167724, 
350.97252771771923, 395.3305763404027, 361.1465724210039, 365.72428010382623], 
"eval_len": [70, 70, 66, 65, 64, 99, 64, 72, 66, 67]}

  3%|▎         | 29995/1000000 [14:07<5:53:53, 45.68it/s]global step 30000, trans_decision ep_re 376.3621040512013

{"global_step": 30000, "eval_re": [455.54964226617693, 338.6935737989159, 
464.096569851463, 351.58096224705105, 404.0752855103254, 373.75135692208704, 
355.5552431336897, 177.50215370685646, 362.52003721586374, 480.29621585958364], 
"eval_len": [91, 70, 88, 73, 82, 68, 74, 34, 75, 92]}

  4%|▍         | 39999/1000000 [19:50<7:44:04, 34.48it/s]global step 40000, trans_decision ep_re 372.69081512206435

{"global_step": 40000, "eval_re": [304.98634473018666, 403.0319675540422, 
372.39789871190163, 287.91410105348507, 348.85229341384814, 307.665062411057, 
332.3056058817458, 550.7726117843347, 445.9785511415548, 373.0037145384872], 
"eval_len": [64, 83, 75, 57, 69, 62, 67, 112, 91, 78]}

  5%|▍         | 49999/1000000 [25:04<7:37:36, 34.60it/s]global step 50000, trans_decision ep_re 438.19465588304286

{"global_step": 50000, "eval_re": [664.3223500819327, 425.7288140262139, 
348.264502509393, 368.7045587050969, 609.9016584561249, 390.9527346739433, 
365.0439371893866, 354.9083409185319, 394.8748518730134, 459.24481039679154], 
"eval_len": [140, 91, 70, 72, 117, 74, 67, 74, 76, 87]}

  6%|▌         | 59998/1000000 [30:40<5:55:14, 44.10it/s]global step 60000, trans_decision ep_re 415.5241559145699

{"global_step": 60000, "eval_re": [293.4054086387178, 312.29261886561727, 
526.9035044521269, 307.3189998476543, 274.0419542907469, 684.0342191690543, 
282.68067566069345, 352.6309624752724, 469.2550986903739, 652.6781170554425], 
"eval_len": [57, 61, 95, 60, 54, 130, 55, 66, 88, 122]}

  7%|▋         | 69997/1000000 [36:12<5:33:42, 46.45it/s]global step 70000, trans_decision ep_re 650.5733368138773

{"global_step": 70000, "eval_re": [800.2280911169579, 589.498976492572, 
646.6763866683332, 594.8644157254931, 673.5896773840901, 824.4876302772719, 
541.4320661808689, 454.6645880515737, 654.7779572351035, 725.5135790065087], 
"eval_len": [168, 127, 123, 125, 133, 158, 116, 93, 125, 142]}

  8%|▊         | 79998/1000000 [41:43<5:45:06, 44.43it/s]global step 80000, trans_decision ep_re 483.4392616539767

{"global_step": 80000, "eval_re": [666.7252733769315, 162.54114128809897, 
470.63860656284317, 542.4710689195371, 473.56760418609946, 278.2622893547526, 
590.1087015575368, 590.0141510068984, 502.6328811610409, 557.4308991260285], 
"eval_len": [128, 32, 84, 103, 88, 52, 111, 110, 95, 106]}

  9%|▉         | 89996/1000000 [47:23<5:36:01, 45.14it/s]global step 90000, trans_decision ep_re 598.1722509984775

{"global_step": 90000, "eval_re": [688.0848473391337, 371.6876509867863, 
1032.9691795141416, 360.7748881170447, 438.210619784329, 407.60010431657895, 
536.3256415380291, 807.101300750443, 537.8369289006104, 801.1313487376785], 
"eval_len": [142, 75, 202, 77, 91, 77, 107, 166, 103, 160]}

 10%|▉         | 99999/1000000 [53:04<7:08:59, 34.97it/s]global step 100000, trans_decision ep_re 660.962910930058

{"global_step": 100000, "eval_re": [799.3261317352751, 469.6381771547362, 
658.1285767492697, 799.8494977543338, 833.160932875705, 794.8864829493139, 
634.388060827245, 478.8085145723547, 199.90877228859532, 941.5339623937516], 
"eval_len": [155, 99, 138, 165, 170, 162, 134, 101, 38, 184]}

 11%|█         | 109998/1000000 [58:45<7:13:57, 34.18it/s]global step 110000, trans_decision ep_re 610.6703830890986

{"global_step": 110000, "eval_re": [689.3777331584336, 706.6731496985558, 
689.6638432831636, 543.2615277023529, 710.7609405909606, 601.3211108574876, 
572.0719132445874, 548.0001371951116, 468.9840520031686, 576.5894231571646], 
"eval_len": [132, 132, 142, 102, 136, 123, 107, 104, 92, 107]}

 12%|█▏        | 119998/1000000 [1:04:31<5:30:56, 44.32it/s]global step 120000, trans_decision ep_re 519.8632042109961

{"global_step": 120000, "eval_re": [624.0070830641437, 467.2532668529806, 
412.2050223185212, 161.04114765730338, 813.1547196221887, 636.797954182138, 
539.0657154864932, 544.4117174125099, 516.701852128465, 483.9935633852167], 
"eval_len": [121, 91, 77, 32, 149, 119, 102, 103, 97, 92]}

 13%|█▎        | 129997/1000000 [1:10:20<5:30:55, 43.82it/s]global step 130000, trans_decision ep_re 806.2493706012584

{"global_step": 130000, "eval_re": [345.65053368625337, 838.1383929892061, 
693.071496146408, 523.3665448601184, 1087.7788547507232, 949.7686418383515, 
954.7417448624934, 184.89452369741812, 995.4234487546381, 1489.659524426973], 
"eval_len": [64, 163, 142, 109, 212, 189, 197, 39, 192, 297]}

 14%|█▍        | 139996/1000000 [1:15:42<5:37:54, 42.42it/s]global step 140000, trans_decision ep_re 780.6879605945068

{"global_step": 140000, "eval_re": [963.1506779576735, 742.4968123227719, 
658.717672815033, 1054.4596441363167, 598.7596997408829, 568.0731212246063, 
908.5310117055388, 1081.6338551253968, 696.398863880093, 534.6582470367548], 
"eval_len": [187, 141, 126, 209, 113, 105, 174, 203, 130, 102]}

 15%|█▍        | 149996/1000000 [1:21:20<5:19:58, 44.27it/s]global step 150000, trans_decision ep_re 592.4355674981042

{"global_step": 150000, "eval_re": [686.3279221362379, 303.03718200478585, 
558.2900421339859, 328.7669170659657, 553.9590404917013, 494.37417260747253, 
778.2029498588407, 780.2585951251879, 883.2787149168774, 557.8601386399877], 
"eval_len": [132, 55, 116, 66, 115, 97, 163, 147, 171, 110]}

 16%|█▌        | 159997/1000000 [1:26:43<5:48:27, 40.18it/s]global step 160000, trans_decision ep_re 805.5477216820228

{"global_step": 160000, "eval_re": [717.0370124316026, 571.7270409549317, 
649.9482450758856, 1194.290335716728, 709.3081511928228, 745.6166433199193, 
798.4218532807091, 451.9044522328005, 1481.3705287757282, 735.8529538390994], 
"eval_len": [137, 112, 120, 237, 134, 148, 151, 94, 282, 139]}

 17%|█▋        | 169997/1000000 [1:32:28<4:56:14, 46.70it/s]global step 170000, trans_decision ep_re 623.4756481621346

{"global_step": 170000, "eval_re": [773.4659983919039, 686.9833688430816, 
714.365811245256, 346.28332514509344, 899.2231995327389, 469.9752985413845, 
390.2630698988905, 446.10660076248115, 835.4523731793776, 672.6374360811376], 
"eval_len": [144, 142, 134, 73, 195, 99, 76, 81, 159, 127]}

 18%|█▊        | 179996/1000000 [1:38:20<4:53:32, 46.56it/s]global step 180000, trans_decision ep_re 795.2915284678484

{"global_step": 180000, "eval_re": [1001.4906588188329, 969.2561303482292, 
761.4832278056217, 1252.09259540807, 623.0528564295817, 414.6542112574636, 
914.966820552095, 295.58547125662204, 1121.9917584154512, 598.3415543865169], 
"eval_len": [207, 184, 144, 240, 127, 87, 166, 54, 228, 111]}

 19%|█▉        | 189995/1000000 [1:43:52<4:52:22, 46.17it/s]global step 190000, trans_decision ep_re 744.2303850438366

{"global_step": 190000, "eval_re": [542.3181549582143, 1584.9333214283254, 
745.5893164542466, 474.4631473789006, 748.0551433164613, 607.7024327041383, 
712.3854679301287, 1134.5260896554576, 724.7345414442426, 167.59623516825013], 
"eval_len": [111, 319, 145, 104, 154, 126, 150, 232, 148, 33]}

 20%|█▉        | 199995/1000000 [1:49:33<5:23:58, 41.16it/s]global step 200000, trans_decision ep_re 970.6312737302687

{"global_step": 200000, "eval_re": [183.48083932936441, 1144.7052252546828, 
1609.9286457298556, 1609.3003584591854, 404.0998005530805, 1054.9150418777242, 
1423.9367729721807, 897.1599862362856, 893.3536195798805, 485.4324473104474], 
"eval_len": [35, 219, 306, 321, 73, 203, 274, 169, 177, 106]}

 21%|██        | 209999/1000000 [1:55:07<4:43:14, 46.49it/s]global step 210000, trans_decision ep_re 769.193667249108

{"global_step": 210000, "eval_re": [1673.212542268077, 548.7321673494248, 
425.1929359588589, 828.9038761394019, 255.54177188733593, 914.3097211056801, 
1136.2589918497024, 821.16478410269, 395.9313369707139, 692.6885448591951], 
"eval_len": [316, 103, 85, 161, 55, 171, 228, 167, 74, 126]}

 22%|██▏       | 219999/1000000 [2:00:34<5:12:54, 41.54it/s]global step 220000, trans_decision ep_re 471.9312621781879

{"global_step": 220000, "eval_re": [559.3289547158072, 206.81040379873366, 
414.55075353221036, 332.9724477217754, 541.1785397904122, 591.0186258648886, 
660.8063416654566, 392.1780291179574, 524.5785335938696, 495.8899919807678], 
"eval_len": [118, 44, 76, 61, 111, 123, 137, 78, 110, 103]}

 23%|██▎       | 229996/1000000 [2:06:11<6:07:20, 34.94it/s]global step 230000, trans_decision ep_re 1202.9892535669408

{"global_step": 230000, "eval_re": [880.6240976170366, 982.9882924615629, 
1300.9516663216982, 563.3773011363841, 1692.3334915759326, 577.5188151279351, 
1484.9176314364079, 1900.4540931588688, 1446.8862681194441, 1199.8408787141361],
"eval_len": [164, 179, 246, 100, 316, 108, 285, 366, 270, 221]}

 24%|██▍       | 239998/1000000 [2:11:44<4:41:00, 45.08it/s]global step 240000, trans_decision ep_re 1048.273950846329

{"global_step": 240000, "eval_re": [833.4618994445742, 1122.6773975812619, 
1067.4023241732932, 1900.3124191286527, 788.0627706511748, 882.1806911498926, 
1329.0768370580176, 664.0222900829327, 940.16940452675, 955.3734746667404], 
"eval_len": [160, 221, 200, 369, 147, 160, 260, 130, 188, 190]}

 25%|██▍       | 249999/1000000 [2:17:18<4:37:18, 45.08it/s]global step 250000, trans_decision ep_re 742.6562760518568

{"global_step": 250000, "eval_re": [426.2236710662252, 167.01923157541455, 
1276.7042518454884, 278.2593662840187, 1045.7919924946127, 572.8310005849054, 
525.6775727864242, 1352.4806884019217, 690.5291084358761, 1091.045877043681], 
"eval_len": [88, 32, 236, 52, 198, 105, 102, 265, 138, 219]}

 26%|██▌       | 259995/1000000 [2:22:27<4:28:21, 45.96it/s]global step 260000, trans_decision ep_re 702.8584733222526

{"global_step": 260000, "eval_re": [1164.5230509257897, 810.3899744002215, 
550.6546444751312, 296.1718764099479, 584.3886249714789, 1106.8934025456622, 
446.40252797607394, 463.9063814438381, 865.3814882679726, 739.8727618064106], 
"eval_len": [219, 140, 104, 54, 118, 202, 82, 84, 161, 137]}

 27%|██▋       | 269996/1000000 [2:27:23<4:20:49, 46.65it/s]global step 270000, trans_decision ep_re 845.609037940793

{"global_step": 270000, "eval_re": [891.5675140856894, 690.5651490408146, 
1057.8986000177379, 720.8446536573213, 1119.3659208396489, 585.0562561531002, 
618.8787720432886, 887.0902440235292, 634.2856987334894, 1250.5375708133101], 
"eval_len": [171, 147, 201, 133, 214, 119, 126, 167, 130, 249]}

 28%|██▊       | 279998/1000000 [2:32:25<4:22:50, 45.66it/s]global step 280000, trans_decision ep_re 1114.852421754415

{"global_step": 280000, "eval_re": [668.9602520411659, 791.7490943896698, 
167.39333213789357, 2689.4064684672203, 896.9237201662005, 548.7983289593117, 
1746.3972565267838, 799.7317024252771, 1704.8834168109133, 1134.2806456197147], 
"eval_len": [118, 144, 32, 512, 165, 97, 340, 149, 326, 211]}

 29%|██▉       | 289996/1000000 [2:37:34<4:13:53, 46.61it/s]global step 290000, trans_decision ep_re 1388.522453441568

{"global_step": 290000, "eval_re": [1924.9371188777454, 1512.4032555005717, 
1290.273130317196, 678.7904074509005, 3309.697289774029, 695.5799649253885, 
552.9569460446925, 1176.9432273973248, 1426.5297141033127, 1317.1134800245181], 
"eval_len": [350, 285, 242, 126, 594, 130, 110, 218, 258, 245]}

 30%|██▉       | 299997/1000000 [2:43:21<4:38:50, 41.84it/s]global step 300000, trans_decision ep_re 1874.1099990089035

{"global_step": 300000, "eval_re": [698.8782781372962, 4844.817896196395, 
453.6344496145684, 4780.98692143148, 2055.5943553627317, 2288.2578161101055, 
662.9663465753343, 978.7068983815157, 1039.4080834053657, 937.8489448742374], 
"eval_len": [139, 924, 95, 924, 393, 447, 119, 187, 206, 182]}

 31%|███       | 309995/1000000 [2:49:21<4:12:18, 45.58it/s]global step 310000, trans_decision ep_re 1490.5356948628628

{"global_step": 310000, "eval_re": [2966.060508688866, 376.99887370937876, 
1483.1277818380206, 468.08831581397766, 2642.672345569231, 849.7809883338697, 
1182.091331308102, 2017.574583955669, 806.600366586517, 2112.3618528249963], 
"eval_len": [585, 76, 297, 93, 535, 165, 227, 400, 165, 431]}

 32%|███▏      | 319998/1000000 [2:55:11<4:14:14, 44.58it/s]global step 320000, trans_decision ep_re 1350.7276205782005

{"global_step": 320000, "eval_re": [305.31414587086584, 2550.0804524359137, 
3172.1932195437957, 653.212153442407, 394.1187416297088, 831.3256259044688, 
682.9678706728718, 1279.0062488016206, 2044.1696430228847, 1594.888104457467], 
"eval_len": [56, 479, 591, 136, 78, 164, 126, 237, 387, 291]}

 33%|███▎      | 329998/1000000 [3:00:43<4:12:19, 44.25it/s]global step 330000, trans_decision ep_re 1132.7841787698303

{"global_step": 330000, "eval_re": [1235.563234781255, 729.7193412756277, 
2545.569552881777, 294.0021901728095, 598.1456317844547, 618.9973892719414, 
1313.2401026652342, 941.89963558421, 1634.729007806922, 1415.9757014740699], 
"eval_len": [219, 143, 490, 58, 123, 123, 262, 187, 322, 263]}

 34%|███▍      | 339997/1000000 [3:06:02<4:12:08, 43.63it/s]global step 340000, trans_decision ep_re 1263.352241423941

{"global_step": 340000, "eval_re": [2741.746819021335, 400.90538722664087, 
861.5491824983493, 1072.7892215536856, 2414.439633621196, 922.4115838396772, 
888.1119343605735, 1053.4131512230472, 1084.4018988110888, 1193.7536020838154], 
"eval_len": [499, 71, 159, 196, 441, 174, 172, 189, 193, 218]}

 35%|███▍      | 349996/1000000 [3:11:41<3:49:32, 47.20it/s]global step 350000, trans_decision ep_re 1376.7619589767953

{"global_step": 350000, "eval_re": [1037.524082004877, 309.46096658190413, 
282.65139364676025, 1946.7307965762755, 1633.2106848023002, 243.37115125367572, 
1402.278307903948, 2865.311454510166, 507.74412345159146, 3539.336629036456], 
"eval_len": [198, 57, 52, 367, 305, 48, 251, 526, 94, 633]}

 36%|███▌      | 359996/1000000 [3:17:01<3:59:29, 44.54it/s]global step 360000, trans_decision ep_re 1292.665773639817

{"global_step": 360000, "eval_re": [1663.5425570978919, 643.6133901446258, 
1067.099776928498, 1080.927904887859, 2256.2280467079004, 399.60796151971226, 
1357.6684660160372, 721.095449465593, 2281.976363252263, 1454.897820377789], 
"eval_len": [312, 118, 200, 199, 430, 73, 253, 137, 428, 266]}

 37%|███▋      | 369999/1000000 [3:22:41<5:10:50, 33.78it/s]global step 370000, trans_decision ep_re 1720.6778141169543

{"global_step": 370000, "eval_re": [1576.7590060334223, 1057.4079504111453, 
2419.6942177274746, 587.8669106019308, 156.42046212584418, 3068.417991975451, 
168.34623583364677, 1165.8515926904786, 5688.318264216226, 1317.6955095539245], 
"eval_len": [278, 190, 425, 110, 31, 533, 32, 211, 1000, 233]}

 38%|███▊      | 379999/1000000 [3:28:21<3:41:39, 46.62it/s]global step 380000, trans_decision ep_re 2123.148987686375

{"global_step": 380000, "eval_re": [3171.6505291825697, 3668.083566434091, 
1028.054851575255, 1698.2426645264022, 774.4519602108195, 2392.0217624618754, 
1701.1098986632414, 469.88605063537034, 2454.3206819396078, 3873.6679112345223],
"eval_len": [584, 660, 184, 298, 134, 420, 304, 89, 439, 720]}

 39%|███▉      | 389999/1000000 [3:33:07<3:40:54, 46.02it/s]global step 390000, trans_decision ep_re 1189.8650619172292

{"global_step": 390000, "eval_re": [2303.5985641474595, 590.2756268415442, 
1832.4794575968083, 1659.0444323842626, 403.4958129034882, 183.0343875881704, 
1558.5275215588383, 1400.3693309208647, 1687.9085637832648, 279.9169214475919], 
"eval_len": [414, 102, 322, 299, 74, 35, 285, 258, 296, 56]}

 40%|███▉      | 399995/1000000 [3:38:05<3:37:55, 45.89it/s]global step 400000, trans_decision ep_re 1131.3289021685962

{"global_step": 400000, "eval_re": [1316.491680301924, 1577.7815402047615, 
930.4480374144255, 526.3874120908877, 1813.025947995267, 1841.1771639197061, 
2217.8743196693645, 321.9030985676112, 157.06499039112973, 611.134831130885], 
"eval_len": [235, 287, 168, 94, 320, 310, 387, 61, 30, 110]}

 41%|████      | 409999/1000000 [3:43:02<3:33:19, 46.09it/s]global step 410000, trans_decision ep_re 639.3091910612932

{"global_step": 410000, "eval_re": [431.5239070097321, 336.8851078710505, 
1160.25748712823, 2373.9936543669965, 358.7260308159944, 211.80743911670092, 
973.9890140709881, 177.86980352362795, 194.6341603027748, 173.40530640683662], 
"eval_len": [78, 58, 202, 420, 66, 41, 177, 34, 37, 33]}

 42%|████▏     | 419996/1000000 [3:47:55<3:27:58, 46.48it/s]global step 420000, trans_decision ep_re 989.388198744803

{"global_step": 420000, "eval_re": [136.1982237957676, 224.63171314296676, 
1970.3097465286871, 169.02311697094026, 2695.0044755932913, 353.5212454792048, 
1473.776276658343, 1484.5274305347282, 714.913445334043, 671.9763134100584], 
"eval_len": [26, 43, 347, 32, 489, 67, 256, 275, 130, 124]}

 43%|████▎     | 429998/1000000 [3:52:52<3:23:27, 46.69it/s]global step 430000, trans_decision ep_re 1687.9278586310015

{"global_step": 430000, "eval_re": [384.36864906198934, 1113.7580118997746, 
429.74475428565626, 942.1259519258166, 820.511451969879, 2389.730243591849, 
4362.7530475154845, 2773.098264616734, 2057.8827597474105, 1605.3054516954207], 
"eval_len": [74, 202, 78, 170, 147, 430, 764, 493, 367, 285]}

 44%|████▍     | 439999/1000000 [3:58:01<3:23:33, 45.85it/s]global step 440000, trans_decision ep_re 1976.3951531183643

{"global_step": 440000, "eval_re": [1073.1720965643458, 361.12801699906373, 
1410.692911322129, 3535.4289300559094, 785.3130179045472, 1350.6312693919863, 
1224.5311702543672, 152.35707831585285, 5794.095519947876, 4076.601520427568], 
"eval_len": [181, 67, 248, 611, 151, 232, 206, 29, 1000, 724]}

 45%|████▍     | 449997/1000000 [4:02:53<3:18:39, 46.14it/s]global step 450000, trans_decision ep_re 2124.384413537681

{"global_step": 450000, "eval_re": [5877.3688196092835, 891.0758505512256, 
2654.9322359071934, 2058.588933400525, 2129.8402829618544, 2068.6745822338007, 
1475.6456370545516, 423.6600848008319, 2887.985878470692, 776.071830386856], 
"eval_len": [1000, 166, 460, 358, 367, 360, 266, 77, 498, 135]}

 46%|████▌     | 459999/1000000 [4:07:54<3:15:22, 46.07it/s]global step 460000, trans_decision ep_re 884.7336567185317

{"global_step": 460000, "eval_re": [1829.598888723999, 1833.2368867939467, 
204.80511566217044, 631.1744045665358, 1413.7979989639903, 192.7699830991074, 
226.04941629866406, 183.60006543503516, 499.249836677949, 1833.0539709639193], 
"eval_len": [326, 328, 39, 118, 251, 37, 42, 35, 99, 323]}

 47%|████▋     | 469999/1000000 [4:13:01<3:12:22, 45.92it/s]global step 470000, trans_decision ep_re 1643.6289368588375

{"global_step": 470000, "eval_re": [1063.438085150763, 1774.3139096704783, 
1767.0783952260256, 2281.5683036085898, 308.8833623860602, 528.7865541169976, 
3193.42941272747, 2230.921434038833, 982.6398351549495, 2305.2300765082073], 
"eval_len": [187, 295, 296, 390, 60, 100, 520, 358, 174, 390]}

 48%|████▊     | 479998/1000000 [4:18:01<3:05:57, 46.60it/s]global step 480000, trans_decision ep_re 1716.0942222582557

{"global_step": 480000, "eval_re": [328.231246114169, 3065.7211126431575, 
136.18828983460276, 745.8434256178692, 2420.3861935165596, 1744.0600639735303, 
1382.082542716909, 834.4105533525131, 2677.5713877478765, 3826.447407065372], 
"eval_len": [62, 531, 26, 132, 411, 326, 247, 147, 462, 651]}

 49%|████▉     | 489998/1000000 [4:23:01<3:03:20, 46.36it/s]global step 490000, trans_decision ep_re 1466.6394198266303

{"global_step": 490000, "eval_re": [1789.4030031832244, 1017.7682537541392, 
1158.272880225624, 1445.7980188123158, 320.4509190408353, 3524.632015660147, 
1452.9559622280885, 1695.0372917954849, 376.05965394151406, 1886.01619962493], 
"eval_len": [328, 184, 209, 267, 61, 603, 268, 296, 71, 331]}

 50%|████▉     | 499996/1000000 [4:27:45<2:59:03, 46.54it/s]global step 500000, trans_decision ep_re 1011.442729477629

{"global_step": 500000, "eval_re": [167.85455294360358, 160.75371802853343, 
539.3379132680602, 125.85171606039995, 536.6184140401086, 4780.91999960569, 
1430.3260039478419, 1535.1190001711277, 363.3188989331405, 474.32707777778273], 
"eval_len": [32, 31, 95, 24, 100, 801, 244, 268, 67, 83]}

 51%|█████     | 509998/1000000 [4:32:42<2:55:21, 46.57it/s]global step 510000, trans_decision ep_re 1760.643416655758

{"global_step": 510000, "eval_re": [1705.904567608115, 2410.492420353914, 
406.4721408491257, 1982.732545114462, 2271.347542681852, 3721.5961147183884, 
2666.240665774438, 362.9653401283175, 607.0204941441386, 1471.6623351848314], 
"eval_len": [289, 405, 74, 368, 376, 622, 439, 66, 110, 253]}

 52%|█████▏    | 519998/1000000 [4:37:52<2:51:22, 46.68it/s]global step 520000, trans_decision ep_re 1302.6121584170128

{"global_step": 520000, "eval_re": [664.0094379814193, 524.4288477910937, 
785.5956357315852, 1637.1281752312389, 125.45031918975381, 2080.1088732963126, 
506.49831452738493, 4442.375242681178, 2062.9957407011752, 197.53099703898664], 
"eval_len": [122, 97, 147, 308, 24, 392, 92, 766, 359, 38]}

 53%|█████▎    | 529998/1000000 [4:42:38<2:48:01, 46.62it/s]global step 530000, trans_decision ep_re 895.3077451777177

{"global_step": 530000, "eval_re": [547.010164036543, 532.7511764818328, 
203.0734358194485, 841.8027556845219, 916.1965607536855, 1306.335087269831, 
984.0552924912953, 1096.5140861427656, 1697.083184771983, 828.2557083252699], 
"eval_len": [100, 96, 39, 151, 167, 232, 177, 189, 300, 152]}

 54%|█████▍    | 539999/1000000 [4:47:34<2:46:23, 46.08it/s]global step 540000, trans_decision ep_re 867.989839858879

{"global_step": 540000, "eval_re": [167.91994334643988, 182.12977731622703, 
1577.6977909622071, 114.75494171785833, 141.53725896329465, 3271.2890006534963, 
192.49043606102677, 944.4873579447493, 542.9694065838861, 1544.6224850396052], 
"eval_len": [32, 35, 280, 22, 27, 573, 37, 170, 101, 274]}

 55%|█████▍    | 549997/1000000 [4:52:42<2:43:19, 45.92it/s]global step 550000, trans_decision ep_re 1316.6764598440943

{"global_step": 550000, "eval_re": [1127.4048208473528, 2055.330183928712, 
2288.058122397101, 2547.973397045142, 1074.220830522647, 1490.6226178199843, 
1562.3081000780562, 171.6234442363015, 167.91348997448048, 681.309591591165], 
"eval_len": [194, 347, 388, 423, 183, 262, 266, 33, 32, 121]}

 56%|█████▌    | 559995/1000000 [4:57:42<2:53:13, 42.33it/s]global step 560000, trans_decision ep_re 1375.7849088831954

{"global_step": 560000, "eval_re": [2549.2035360256154, 804.3737660954597, 
1857.6996453592922, 688.4317686000583, 1175.4234363272194, 955.0940175023065, 
1068.043864314371, 2098.8752638299543, 1844.3216654081766, 716.3821253694979], 
"eval_len": [437, 140, 315, 125, 203, 170, 204, 359, 319, 141]}

 57%|█████▋    | 569997/1000000 [5:02:32<2:34:10, 46.48it/s]global step 570000, trans_decision ep_re 1514.6675146264438

{"global_step": 570000, "eval_re": [2930.8899700944335, 1238.5721895178726, 
1440.5069944168895, 787.6978974138765, 3698.821454828723, 925.5872187531824, 
2265.9202402323845, 601.8708011094843, 355.95853666151066, 900.8498432360826], 
"eval_len": [517, 225, 261, 162, 649, 166, 404, 110, 64, 169]}

 58%|█████▊    | 579995/1000000 [5:07:28<2:30:29, 46.51it/s]global step 580000, trans_decision ep_re 792.0267188858666

{"global_step": 580000, "eval_re": [151.88164240702955, 690.5241084906871, 
703.0126887506016, 1453.3083614562368, 440.5126017209268, 872.7306392586347, 
777.0591232191906, 692.3472369958881, 816.749694713417, 1322.1410918460542], 
"eval_len": [29, 126, 127, 255, 82, 157, 136, 130, 154, 218]}

 59%|█████▉    | 589998/1000000 [5:12:32<2:25:13, 47.06it/s]global step 590000, trans_decision ep_re 2295.0095941071168

{"global_step": 590000, "eval_re": [751.4169188256485, 5811.124698820875, 
1335.0821452704909, 1042.9827999760862, 2261.76477908296, 910.4088020418455, 
1986.8769694172056, 1473.084620286552, 6078.867950256202, 1298.486257093302], 
"eval_len": [132, 962, 247, 183, 377, 157, 332, 250, 1000, 225]}

 60%|█████▉    | 599999/1000000 [5:17:32<2:23:39, 46.41it/s]global step 600000, trans_decision ep_re 1601.2262104625297

{"global_step": 600000, "eval_re": [173.22950641074908, 1541.0669578329187, 
4970.155923149277, 348.4656070830919, 1216.0700776171973, 265.00848307782627, 
1537.0390319788341, 156.8825033045181, 1252.5788667125953, 4551.76514745829], 
"eval_len": [33, 265, 827, 65, 242, 48, 267, 30, 220, 783]}

 61%|██████    | 609999/1000000 [5:22:15<2:20:00, 46.42it/s]global step 610000, trans_decision ep_re 1468.9950128359637

{"global_step": 610000, "eval_re": [954.294684750644, 1626.5955166260592, 
1206.885306198624, 751.1120213192232, 2266.0019680932955, 1818.4524779984158, 
2193.8893604843347, 1592.1801478723287, 1857.5694020968942, 422.96924291981657],
"eval_len": [171, 276, 208, 136, 382, 306, 372, 276, 313, 84]}

 62%|██████▏   | 619999/1000000 [5:27:22<2:16:14, 46.48it/s]global step 620000, trans_decision ep_re 1556.0432493496041

{"global_step": 620000, "eval_re": [1208.0848088734235, 1188.2695461972723, 
768.5972361045165, 2304.052562566864, 4007.073466497008, 1386.5493117636186, 
860.472791966852, 1983.7127592571258, 685.6659507247625, 1167.9540595445994], 
"eval_len": [211, 222, 133, 381, 675, 239, 151, 339, 121, 205]}

 63%|██████▎   | 629999/1000000 [5:32:07<4:40:34, 21.98it/s]global step 630000, trans_decision ep_re 1379.8097604012232

{"global_step": 630000, "eval_re": [1351.8045020754596, 395.9717450496404, 
605.5076992556229, 937.9246831330734, 1344.3108947554617, 1688.357471483537, 
5157.536758905458, 462.26298124251997, 1016.842443557676, 837.5784245537849], 
"eval_len": [231, 80, 109, 168, 227, 282, 862, 88, 182, 159]}

 64%|██████▍   | 639997/1000000 [5:37:13<2:09:30, 46.33it/s]global step 640000, trans_decision ep_re 1288.4041442149557

{"global_step": 640000, "eval_re": [600.970841110069, 1963.8254387129712, 
290.6767085472926, 2017.63749105524, 2267.013626452069, 2143.0874124216357, 
1659.4046001808513, 1474.4994954551717, 193.06182165032254, 273.8640065639331], 
"eval_len": [123, 324, 53, 333, 378, 363, 269, 246, 37, 50]}

 65%|██████▍   | 649999/1000000 [5:41:55<2:05:07, 46.62it/s]global step 650000, trans_decision ep_re 1255.708306824931

{"global_step": 650000, "eval_re": [168.84156977440063, 1077.4620215276243, 
520.0382151931609, 3905.1502778579716, 657.8024745867235, 1025.349253407203, 
1277.4494403832502, 2568.493117429912, 755.9275572739832, 600.5691408150813], 
"eval_len": [32, 192, 94, 682, 121, 183, 242, 455, 133, 107]}

 66%|██████▌   | 659998/1000000 [5:47:03<2:00:38, 46.97it/s]global step 660000, trans_decision ep_re 1133.2349036707897

{"global_step": 660000, "eval_re": [1429.9287370663694, 910.2665277759962, 
511.3826797357502, 700.2061575151039, 229.60616318906582, 532.3606934257082, 
5229.91655508302, 541.9706299125942, 799.0590560700603, 447.65183693423046], 
"eval_len": [251, 168, 95, 130, 44, 101, 920, 100, 148, 83]}

 67%|██████▋   | 669998/1000000 [5:51:44<1:56:44, 47.11it/s]global step 670000, trans_decision ep_re 1578.2277024533832

{"global_step": 670000, "eval_re": [2632.0753320664007, 856.2714251249918, 
4854.857812498006, 1337.1859241045436, 300.155243313028, 120.4431870287299, 
444.0384585821871, 2352.6739589747467, 1637.7142364641502, 1246.861446377052], 
"eval_len": [453, 154, 844, 261, 54, 23, 80, 401, 287, 220]}

 68%|██████▊   | 679997/1000000 [5:56:53<1:54:29, 46.58it/s]global step 680000, trans_decision ep_re 1740.282749811518

{"global_step": 680000, "eval_re": [2644.771616923079, 2354.6768402761077, 
162.00876656740667, 873.5172392414852, 2374.322194240869, 541.5416743875988, 
875.0980779333717, 2127.386749206853, 635.3612860659728, 4814.143053272436], 
"eval_len": [472, 420, 31, 157, 417, 100, 156, 386, 117, 846]}

 69%|██████▉   | 689996/1000000 [6:01:36<1:49:04, 47.37it/s]global step 690000, trans_decision ep_re 1569.3625650872443

{"global_step": 690000, "eval_re": [551.8310183100949, 156.91614567577014, 
1390.4775584237389, 2389.9442032225065, 1930.769520575089, 2081.3691422718207, 
3024.7071256075856, 631.8534834693363, 917.619634832251, 2618.1378184842506], 
"eval_len": [99, 30, 237, 399, 332, 349, 493, 114, 161, 433]}

 70%|██████▉   | 699998/1000000 [6:06:43<1:45:37, 47.34it/s]global step 700000, trans_decision ep_re 2658.47510806009

{"global_step": 700000, "eval_re": [3280.6870763150505, 803.6353152258492, 
5929.073796940098, 3630.499002642774, 250.0494027510999, 5049.565097545235, 
4539.6928404058, 378.9306260890076, 197.6466396908972, 2524.9712829950895], 
"eval_len": [561, 145, 1000, 635, 47, 862, 783, 70, 38, 435]}

 71%|███████   | 709998/1000000 [6:11:43<1:42:37, 47.10it/s]global step 710000, trans_decision ep_re 1149.258600674923

{"global_step": 710000, "eval_re": [304.2612259216565, 4952.2945361916945, 
345.4391704557654, 293.625022286779, 2961.9716921699574, 579.2265069870537, 
728.601232087927, 162.46641112751584, 178.9616126942533, 985.7385968266294], 
"eval_len": [58, 845, 63, 53, 508, 113, 132, 31, 34, 174]}

 72%|███████▏  | 719998/1000000 [6:16:25<1:38:58, 47.15it/s]global step 720000, trans_decision ep_re 1061.0340895789934

{"global_step": 720000, "eval_re": [537.9627509377452, 151.34691396684875, 
161.9778963826085, 2377.4498797559086, 657.5106777758307, 1027.5668997826929, 
669.4583591029873, 1165.149539267349, 2227.9828705361742, 1633.935108281789], 
"eval_len": [97, 29, 31, 413, 117, 181, 119, 201, 368, 277]}

 73%|███████▎  | 729999/1000000 [6:21:33<1:36:44, 46.52it/s]global step 730000, trans_decision ep_re 1871.3643218206023

{"global_step": 730000, "eval_re": [2667.259105691635, 1014.073263777575, 
691.8890942503488, 3572.8787392340855, 265.69134611602436, 2371.4387964651787, 
3490.5615300350714, 2730.186720447607, 1192.613750459042, 717.0508717294592], 
"eval_len": [438, 172, 124, 584, 50, 395, 591, 452, 204, 140]}

 74%|███████▍  | 739996/1000000 [6:26:14<1:31:40, 47.27it/s]global step 740000, trans_decision ep_re 1020.6550355854391

{"global_step": 740000, "eval_re": [256.1547298029197, 2881.028518686721, 
462.87541465126054, 1795.3832281886464, 899.0587242481599, 1186.867544236324, 
239.68225430189926, 1022.3088877985418, 362.5742512664939, 1100.6168026734247], 
"eval_len": [48, 467, 83, 302, 162, 208, 47, 165, 72, 191]}

 75%|███████▍  | 749999/1000000 [6:31:06<1:29:23, 46.61it/s]global step 750000, trans_decision ep_re 1343.123210393115

{"global_step": 750000, "eval_re": [4551.9321520054245, 156.58909480581778, 
1437.2118876728864, 1325.59491756596, 1358.6279834401262, 809.6495316684011, 
1490.1177769443664, 1414.715708577304, 157.42520583515227, 729.3678454157123], 
"eval_len": [767, 30, 255, 238, 243, 148, 269, 257, 30, 132]}

 76%|███████▌  | 759998/1000000 [6:36:00<1:24:50, 47.15it/s]global step 760000, trans_decision ep_re 1072.4648644652639

{"global_step": 760000, "eval_re": [1085.6779614466693, 141.68978503673267, 
1254.2984867199232, 171.32197911715753, 152.7350489814863, 194.56609041167403, 
3122.712338914205, 1364.4842212414949, 918.4506270021701, 2318.7121057811255], 
"eval_len": [184, 27, 218, 33, 29, 37, 541, 234, 162, 399]}

 77%|███████▋  | 769996/1000000 [6:41:04<1:21:06, 47.26it/s]global step 770000, trans_decision ep_re 2184.0766806257907

{"global_step": 770000, "eval_re": [1367.3249876256818, 2006.4702931947786, 
1858.4566478565632, 2150.4141480964413, 684.0654587863801, 3646.8057618362946, 
624.1908250896508, 6250.5328050899925, 1818.78364118642, 1433.722237495707], 
"eval_len": [229, 332, 306, 355, 117, 585, 110, 1000, 302, 242]}

 78%|███████▊  | 779997/1000000 [6:46:04<1:19:14, 46.27it/s]global step 780000, trans_decision ep_re 1617.8293549903399

{"global_step": 780000, "eval_re": [458.8400306411616, 796.644885037433, 
5752.633468544063, 3129.780404722996, 951.5413174765847, 363.0111171171004, 
1477.3193754296997, 396.6084002686144, 193.6557273183371, 2658.2588233474066], 
"eval_len": [91, 138, 958, 520, 163, 66, 259, 71, 37, 440]}

 79%|███████▉  | 789998/1000000 [6:50:47<1:14:25, 47.03it/s]global step 790000, trans_decision ep_re 1727.7654317369409

{"global_step": 790000, "eval_re": [892.5865727979522, 448.27309732281304, 
2244.5239379575387, 4104.679350311292, 2087.725879584876, 2801.5978932894805, 
744.8353974084481, 1119.4209320237796, 2522.651508231522, 311.35974844170255], 
"eval_len": [158, 82, 376, 664, 350, 465, 132, 193, 412, 56]}

 80%|███████▉  | 799997/1000000 [6:55:54<1:11:41, 46.50it/s]global step 800000, trans_decision ep_re 1310.577506796292

{"global_step": 800000, "eval_re": [893.6888651648173, 621.5749406215368, 
1234.187125517466, 191.06792334850752, 490.9498475798398, 3684.3519295067194, 
277.7371133367166, 2008.778776223827, 2560.108036667541, 1143.330509995949], 
"eval_len": [166, 113, 214, 36, 90, 625, 54, 343, 439, 194]}

 81%|████████  | 809998/1000000 [7:00:54<1:06:56, 47.30it/s]global step 810000, trans_decision ep_re 1808.201104617463

{"global_step": 810000, "eval_re": [1425.4832780352065, 3212.1582374701893, 
156.48317483894468, 907.5167611282839, 357.0597653119133, 1200.5407477656504, 
5665.266134056048, 679.6835358202604, 3430.5418750532017, 1047.277536694932], 
"eval_len": [255, 564, 30, 159, 69, 214, 995, 123, 602, 190]}

 82%|████████▏ | 819999/1000000 [7:05:34<1:05:44, 45.64it/s]global step 820000, trans_decision ep_re 1717.3300877966813

{"global_step": 820000, "eval_re": [417.8779046642466, 1010.4859093904208, 
1635.2311865965394, 3044.1481376592133, 1862.1938087816145, 130.9321934442878, 
1258.137846770052, 4970.171619978475, 1957.8587050911774, 886.2635655907825], 
"eval_len": [74, 180, 283, 525, 318, 25, 220, 832, 340, 148]}

 83%|████████▎ | 829996/1000000 [7:10:44<1:00:17, 46.99it/s]global step 830000, trans_decision ep_re 1415.6845232896685

{"global_step": 830000, "eval_re": [3115.572724003659, 197.60944096956183, 
1326.0678328405395, 453.7098977318181, 676.783714307993, 2469.709911795621, 
494.57267777852576, 2400.08125933695, 498.2178558092059, 2524.5199183228115], 
"eval_len": [546, 38, 232, 83, 126, 436, 92, 426, 87, 447]}

 84%|████████▍ | 839997/1000000 [7:15:25<57:21, 46.49it/s]global step 840000, trans_decision ep_re 1581.046134478116

{"global_step": 840000, "eval_re": [4096.608792010059, 3181.8442445219143, 
653.1002324507879, 767.2803644808785, 1701.7095499922511, 186.36359001845355, 
136.1251856092614, 2751.2236647969353, 1990.5154110651104, 345.6903098355091], 
"eval_len": [679, 531, 117, 147, 291, 36, 26, 459, 354, 65]}

 85%|████████▍ | 849999/1000000 [7:20:34<53:55, 46.36it/s]global step 850000, trans_decision ep_re 2613.7464080251657

{"global_step": 850000, "eval_re": [3985.0367475402772, 1945.5863349947083, 
1479.4566681581632, 3061.475223306023, 3584.227772812651, 1439.6279650925999, 
3332.386049070047, 136.26539121857633, 2702.8578707644224, 4470.544057294186], 
"eval_len": [664, 326, 259, 510, 606, 252, 557, 26, 448, 740]}

 86%|████████▌ | 859998/1000000 [7:25:34<49:28, 47.17it/s]global step 860000, trans_decision ep_re 1218.9072648899219

{"global_step": 860000, "eval_re": [1551.730274572733, 181.51083431834232, 
152.29329324619545, 178.01089055232427, 1820.6891480859842, 1088.7413492518651, 
3896.3991931768483, 2489.8832038660917, 355.5087873819495, 474.3056744468846], 
"eval_len": [273, 35, 29, 34, 322, 195, 685, 432, 67, 88]}

 87%|████████▋ | 869996/1000000 [7:30:17<46:35, 46.51it/s]global step 870000, trans_decision ep_re 1037.5622360336563

{"global_step": 870000, "eval_re": [172.33039806176166, 1764.938128336869, 
1258.7385105750334, 634.9023611633527, 642.5144120705521, 624.2727828421955, 
4010.5150116301893, 643.588042956702, 156.6157503518328, 467.20696234807303], 
"eval_len": [33, 290, 217, 114, 111, 113, 636, 117, 30, 83]}

 88%|████████▊ | 879997/1000000 [7:35:24<43:25, 46.05it/s]global step 880000, trans_decision ep_re 1011.7677568945116

{"global_step": 880000, "eval_re": [183.74110542708894, 801.354790511285, 
4146.803454822895, 135.4759960652525, 162.76348141218457, 135.48187927585374, 
406.16943193728156, 2481.75666090895, 600.1906941979489, 1063.9400743863755], 
"eval_len": [35, 141, 699, 26, 31, 26, 76, 421, 108, 183]}

 89%|████████▉ | 889995/1000000 [7:40:08<39:47, 46.07it/s]global step 890000, trans_decision ep_re 1570.4481498350865

{"global_step": 890000, "eval_re": [166.63520330367643, 2523.3889808408376, 
5076.350795294404, 1744.89579942074, 547.7522186919422, 1927.1732360098777, 
226.8893513892903, 405.5389458406252, 624.216006176912, 2461.640961382561], 
"eval_len": [32, 427, 849, 290, 98, 337, 43, 76, 112, 409]}

 90%|████████▉ | 899996/1000000 [7:45:24<35:40, 46.73it/s]global step 900000, trans_decision ep_re 2345.6255728019723

{"global_step": 900000, "eval_re": [393.18011753990925, 3620.024482092045, 
1549.215855609715, 6122.638938070741, 6058.568466168927, 130.6765102306435, 
182.31697914222048, 1445.8492678310884, 1771.9410544118641, 2181.8440569225686],
"eval_len": [72, 569, 261, 1000, 1000, 25, 35, 244, 287, 363]}

 91%|█████████ | 909996/1000000 [7:50:24<32:08, 46.67it/s]global step 910000, trans_decision ep_re 1749.4440372927834

{"global_step": 910000, "eval_re": [1307.7987920404785, 324.80310999832045, 
3693.0868555167367, 5031.6826999030645, 333.6559844626268, 342.33379727660014, 
458.466306767702, 345.06276960700285, 3230.2456891955326, 2427.3043681597687], 
"eval_len": [219, 59, 620, 812, 60, 65, 82, 64, 536, 403]}

 92%|█████████▏| 919996/1000000 [7:55:24<28:36, 46.61it/s]global step 920000, trans_decision ep_re 1844.0763168848196

{"global_step": 920000, "eval_re": [162.4083959056817, 198.8156693099047, 
3605.442478429476, 1599.1410271879345, 1040.3571513592942, 3137.9787855033364, 
1570.8612211245943, 1042.0204989785623, 2078.3425754494706, 4005.395365599939], 
"eval_len": [31, 38, 574, 264, 169, 505, 247, 173, 334, 630]}

 93%|█████████▎| 929999/1000000 [8:00:08<25:22, 45.99it/s]global step 930000, trans_decision ep_re 1101.0985934187852

{"global_step": 930000, "eval_re": [1700.1185653775656, 664.6468116167107, 
809.0080259610312, 243.69508042339203, 140.88303631067882, 3731.457261261738, 
351.16696238292786, 3066.762835765773, 141.14415309215948, 162.10320199587318], 
"eval_len": [281, 117, 141, 46, 27, 613, 65, 510, 27, 31]}

 94%|█████████▍| 939995/1000000 [8:05:04<21:37, 46.24it/s]global step 940000, trans_decision ep_re 1568.9770791527935

{"global_step": 940000, "eval_re": [786.5769708541837, 2525.1636503435675, 
530.3521141770473, 1097.1806903748231, 726.5451805459608, 151.90734932184947, 
4940.65235601591, 1328.4697965388402, 3461.5118505520863, 141.41083280366638], 
"eval_len": [141, 428, 96, 194, 131, 29, 844, 236, 591, 27]}

 95%|█████████▍| 949995/1000000 [8:10:14<18:04, 46.11it/s]global step 950000, trans_decision ep_re 1281.1378730775937

{"global_step": 950000, "eval_re": [136.17818815536947, 1375.201941379628, 
357.1757277862743, 414.82560162776133, 2602.8894450943767, 1899.426317642765, 
3276.7899976189333, 1410.9780920363758, 1185.9109940680898, 152.00242536636569],
"eval_len": [26, 235, 67, 76, 421, 320, 541, 229, 207, 29]}

 96%|█████████▌| 959996/1000000 [8:15:14<14:16, 46.73it/s]global step 960000, trans_decision ep_re 1917.3961464972174

{"global_step": 960000, "eval_re": [998.3076490584075, 2451.5702793754267, 
340.23515725231755, 6578.456365282538, 6398.264658046735, 297.6799576902956, 
1146.2443983880446, 660.0695579425437, 141.05841337052624, 162.07502856533594], 
"eval_len": [162, 385, 61, 1000, 957, 53, 191, 113, 27, 31]}

 97%|█████████▋| 969995/1000000 [8:20:14<10:51, 46.03it/s]global step 970000, trans_decision ep_re 1921.8457311694522

{"global_step": 970000, "eval_re": [3022.382527151601, 921.5986297576762, 
940.8224024662803, 4946.522799670447, 727.7712585007622, 3806.7654031676743, 
1334.9117605182028, 161.52105761494485, 2496.498986482364, 859.6624863645679], 
"eval_len": [502, 152, 155, 807, 124, 625, 218, 31, 430, 146]}

 98%|█████████▊| 979995/1000000 [8:25:09<07:22, 45.20it/s]global step 980000, trans_decision ep_re 1219.5415727762224

{"global_step": 980000, "eval_re": [779.8925339380962, 763.3835010914637, 
807.4081212945034, 2095.190982236367, 146.2053631575563, 2684.290336966559, 
188.4996304992735, 662.3829756087581, 1201.9167513539833, 2866.2455316156656], 
"eval_len": [130, 131, 134, 341, 28, 428, 36, 123, 197, 452]}

 99%|█████████▉| 989996/1000000 [8:30:09<03:32, 46.99it/s]global step 990000, trans_decision ep_re 1815.8360302472108

{"global_step": 990000, "eval_re": [173.75837435862158, 4142.041600280386, 
897.3060290018319, 689.7151841911959, 3285.5842308176157, 3962.0381100237255, 
1065.5903205396528, 212.73318643206116, 890.0449639098549, 2839.5483029171623], 
"eval_len": [33, 635, 151, 115, 497, 605, 171, 41, 146, 435]}

100%|█████████▉| 999998/1000000 [8:35:26<00:00, 46.94it/s]global step 1000000, trans_decision ep_re 1536.8948488342826

{"global_step": 1000000, "eval_re": [2360.805686021351, 2921.96424620687, 
338.6149198498703, 1162.0071780161927, 6348.200151940016, 604.5624344347076, 
679.9598226903129, 381.94787300032203, 419.6053181859874, 151.28085799719284], 
"eval_len": [370, 455, 61, 185, 974, 102, 115, 66, 75, 29]}

100%|██████████| 1000000/1000000 [8:35:28<00:00, 32.33it/s]
