
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [02:51<6:06:19, 45.04it/s]global step 10000, trans_decision ep_re 329.2855387815506

{"global_step": 10000, "eval_re": [347.1796386216297, 361.6918514025437, 
332.73767813254847, 336.15273394243076, 297.421506021836, 313.9780377507973, 
342.0087259765261, 343.90920867831244, 297.6576378127867, 320.11836947609464], 
"eval_len": [63, 66, 60, 61, 55, 57, 62, 62, 54, 58]}

  2%|▏         | 19997/1000000 [08:40<8:15:47, 32.94it/s]global step 20000, trans_decision ep_re 472.44240433264275

{"global_step": 20000, "eval_re": [299.76125069533987, 161.81340396870544, 
550.1978687149259, 442.0479916097361, 733.1787886641895, 448.4623263462583, 
556.9746143444303, 314.4414083889034, 541.3930358676984, 676.1533547262409], 
"eval_len": [55, 31, 112, 89, 139, 91, 104, 58, 101, 128]}

  3%|▎         | 29995/1000000 [14:27<6:10:56, 43.58it/s]global step 30000, trans_decision ep_re 433.12222013818973

{"global_step": 30000, "eval_re": [356.75741862397456, 311.79240512499064, 
487.4456806284141, 491.7853008521906, 620.5610387368777, 453.79802510111733, 
332.4951300012152, 380.03986933768357, 522.737361929773, 373.80997104565995], 
"eval_len": [65, 58, 104, 96, 116, 86, 61, 70, 95, 68]}

  4%|▍         | 39999/1000000 [20:07<6:09:17, 43.33it/s]global step 40000, trans_decision ep_re 432.00730565402836

{"global_step": 40000, "eval_re": [404.9236855348692, 574.3969295312028, 
593.7510332525212, 430.7625151923276, 439.34822653704794, 320.96682397404874, 
430.6738857435206, 379.3993776601625, 344.37642375083976, 401.47415536374393], 
"eval_len": [78, 107, 111, 79, 81, 63, 80, 76, 67, 74]}

  5%|▍         | 49996/1000000 [25:41<5:47:55, 45.51it/s]global step 50000, trans_decision ep_re 552.7344410691702

{"global_step": 50000, "eval_re": [535.5124103346208, 708.0897432285042, 
721.2299048375187, 624.9687708619026, 442.5329427686807, 506.639448250631, 
465.7608651926966, 469.8416258261861, 671.2511666137452, 381.51753277721645], 
"eval_len": [110, 145, 137, 130, 83, 106, 94, 100, 123, 75]}

  6%|▌         | 59996/1000000 [31:21<5:42:05, 45.80it/s]global step 60000, trans_decision ep_re 592.4280638109769

{"global_step": 60000, "eval_re": [411.642325935325, 334.78355059371336, 
664.2034438739271, 805.5524189061388, 409.4282749438372, 580.5696782121571, 
818.5300303104069, 689.5880141139274, 605.3112751371242, 604.6716260832128], 
"eval_len": [75, 69, 126, 161, 75, 105, 157, 138, 113, 114]}

  7%|▋         | 69998/1000000 [36:27<5:36:50, 46.02it/s]global step 70000, trans_decision ep_re 443.18508552986975

{"global_step": 70000, "eval_re": [513.9221644799269, 415.502487297985, 
536.1035063230997, 353.72378785494163, 156.25581420573891, 424.648972847611, 
549.9349466811951, 409.13062574625167, 529.5269862202387, 543.10156364171], 
"eval_len": [95, 77, 97, 64, 30, 78, 101, 76, 97, 98]}

  8%|▊         | 79998/1000000 [41:28<5:31:23, 46.27it/s]global step 80000, trans_decision ep_re 399.98834871865836

{"global_step": 80000, "eval_re": [390.3414612188658, 408.89966762426417, 
405.18487968177993, 413.84488164907793, 509.2925662770752, 385.0790256558509, 
194.22001307039707, 505.1070460201615, 393.90711780130925, 394.0068281878016], 
"eval_len": [73, 77, 73, 75, 95, 71, 38, 94, 72, 73]}

  9%|▉         | 89998/1000000 [46:33<5:29:41, 46.00it/s]global step 90000, trans_decision ep_re 542.586789986683

{"global_step": 90000, "eval_re": [684.3465077783437, 591.9580381746969, 
706.1034131295407, 487.78059753045596, 183.00862238552202, 633.2615488293626, 
172.56993026064455, 777.0100497293507, 708.1942919681704, 481.63490008074183], 
"eval_len": [125, 106, 127, 89, 35, 113, 33, 146, 128, 90]}

 10%|▉         | 99998/1000000 [51:42<5:26:31, 45.94it/s]global step 100000, trans_decision ep_re 414.3264776389989

{"global_step": 100000, "eval_re": [667.5952615995504, 507.7840040011535, 
401.8417761131541, 135.82062045719118, 501.12808285914616, 845.3154473464454, 
166.34559419097252, 166.645843260991, 252.61041588088415, 498.1777306805009], 
"eval_len": [126, 95, 77, 26, 94, 150, 32, 32, 49, 96]}

 11%|█         | 109998/1000000 [56:53<5:19:19, 46.45it/s]global step 110000, trans_decision ep_re 637.2072763077069

{"global_step": 110000, "eval_re": [1013.8318233270587, 439.9343867401629, 
1168.5130435824012, 805.4594746066887, 711.3050384887281, 777.1540030771206, 
729.24691026796, 299.5561354800388, 224.48491007682705, 202.5870374300835], 
"eval_len": [201, 82, 234, 147, 134, 141, 149, 58, 43, 39]}

 12%|█▏        | 119995/1000000 [1:01:48<5:24:22, 45.22it/s]global step 120000, trans_decision ep_re 483.7991637998108

{"global_step": 120000, "eval_re": [995.6954825470235, 342.42048389997103, 
299.08403111896456, 176.6798649139545, 403.25860464970543, 495.12026478904045, 
519.9468881533778, 387.1418579848326, 721.8354262420687, 496.80873369916947], 
"eval_len": [189, 67, 59, 34, 73, 92, 97, 70, 133, 94]}

 13%|█▎        | 129995/1000000 [1:07:02<5:18:10, 45.57it/s]global step 130000, trans_decision ep_re 326.939207858155

{"global_step": 130000, "eval_re": [512.2569463565867, 81.17053573697027, 
306.16785772846856, 576.03567192673, 408.1413627769344, 430.8351752759729, 
506.2969524637365, 125.44705084245064, 156.42807250452628, 166.61245296917323], 
"eval_len": [94, 17, 57, 105, 77, 81, 94, 24, 30, 32]}

 14%|█▍        | 139999/1000000 [1:12:18<5:14:44, 45.54it/s]global step 140000, trans_decision ep_re 503.0989532032556

{"global_step": 140000, "eval_re": [448.29456626983256, 807.3387856086648, 
439.1237240388244, 465.84747157713656, 243.1274684143221, 488.4013397269172, 
420.3431344487823, 656.5191507389301, 563.4614603138536, 498.5324308952924], 
"eval_len": [84, 151, 81, 88, 46, 89, 78, 128, 106, 92]}

 15%|█▍        | 149999/1000000 [1:17:33<5:11:11, 45.52it/s]global step 150000, trans_decision ep_re 447.99570964820924

{"global_step": 150000, "eval_re": [549.4455930505777, 578.8323034533304, 
437.4131725452003, 562.6245062264619, 673.3748031565142, 278.874000556216, 
156.8683910320416, 212.79089420994794, 477.20419366449687, 552.5292385873049], 
"eval_len": [101, 107, 80, 104, 129, 55, 30, 41, 89, 109]}

 16%|█▌        | 159999/1000000 [1:22:45<5:07:17, 45.56it/s]global step 160000, trans_decision ep_re 527.4174802889382

{"global_step": 160000, "eval_re": [766.1650353206295, 432.69891578722235, 
457.5589510270518, 522.0903568757914, 709.4312601550449, 370.9754505521572, 
448.14300353675304, 471.42598137658683, 661.675621787024, 434.0102264711214], 
"eval_len": [139, 79, 86, 95, 128, 71, 82, 86, 121, 80]}

 17%|█▋        | 169999/1000000 [1:27:51<6:07:30, 37.64it/s]global step 170000, trans_decision ep_re 556.7927400006971

{"global_step": 170000, "eval_re": [870.776082837396, 80.3642816063855, 
288.1646993296302, 533.0055735102039, 896.1225027372396, 290.7325945142574, 
823.3152882384775, 376.6100681875704, 1004.1486187787135, 404.6876902670976], 
"eval_len": [165, 17, 56, 101, 176, 54, 154, 74, 185, 75]}

 18%|█▊        | 179999/1000000 [1:33:03<5:03:59, 44.96it/s]global step 180000, trans_decision ep_re 569.5234349740565

{"global_step": 180000, "eval_re": [162.0234022688471, 953.651027396152, 
167.41802201109263, 418.72434630564226, 980.0167404781306, 558.8418917475968, 
818.7831825470321, 782.6052905985092, 416.88552297029554, 436.2849234172661], 
"eval_len": [31, 195, 32, 81, 185, 107, 153, 153, 79, 85]}

 19%|█▉        | 189999/1000000 [1:38:17<4:58:39, 45.20it/s]global step 190000, trans_decision ep_re 587.5625329027604

{"global_step": 190000, "eval_re": [549.3679252956632, 583.0385408993535, 
563.4445843592362, 784.8719678940975, 637.7221148589423, 307.9882115703045, 
426.2500738919513, 689.2150447317038, 767.8494382631607, 565.8774272631898], 
"eval_len": [114, 122, 113, 162, 122, 58, 79, 145, 152, 119]}

 20%|█▉        | 199999/1000000 [1:43:28<5:33:21, 40.00it/s]global step 200000, trans_decision ep_re 643.1651696938759

{"global_step": 200000, "eval_re": [838.3129566903907, 861.436383840603, 
805.983090584293, 693.3735898074733, 772.2991298917756, 282.797666510718, 
607.1742626466495, 957.5910908338356, 270.18077047752274, 342.50275565549913], 
"eval_len": [157, 170, 153, 135, 158, 51, 118, 195, 51, 63]}

 21%|██        | 209999/1000000 [1:48:40<4:50:30, 45.32it/s]global step 210000, trans_decision ep_re 506.96318999676697

{"global_step": 210000, "eval_re": [340.8244655623994, 157.28623917469477, 
189.63670318561108, 1416.8571868152105, 169.4156666643799, 348.33047842153536, 
579.7779002575217, 206.75806309652478, 978.1761567851985, 682.5690400045938], 
"eval_len": [67, 30, 38, 264, 36, 63, 110, 41, 189, 130]}

 22%|██▏       | 219998/1000000 [1:53:42<4:43:03, 45.93it/s]global step 220000, trans_decision ep_re 827.5279889275323

{"global_step": 220000, "eval_re": [369.5873613435896, 916.7302045542689, 
696.3818726898099, 617.1011146135725, 903.5567317257558, 535.8053453330816, 
1203.346614573204, 891.1328875190999, 1145.801765319655, 995.8359916032875], 
"eval_len": [74, 174, 126, 117, 169, 96, 219, 168, 212, 182]}

 23%|██▎       | 229999/1000000 [1:59:06<4:56:56, 43.22it/s]global step 230000, trans_decision ep_re 878.2878806792793

{"global_step": 230000, "eval_re": [939.0250268938668, 192.7151044416967, 
975.5812023055629, 1167.2633024999086, 565.6960285876457, 665.5900582274364, 
638.3439368666152, 1229.042377383264, 911.2169795944711, 1498.4047899923248], 
"eval_len": [179, 37, 176, 226, 107, 130, 120, 226, 178, 279]}

 24%|██▍       | 239996/1000000 [2:04:55<4:47:58, 43.99it/s]global step 240000, trans_decision ep_re 1212.5664592731719

{"global_step": 240000, "eval_re": [766.9980413004216, 1146.7501594777575, 
827.0344494130819, 1630.0728426617675, 1363.0574447336023, 901.6078901648651, 
1686.179028390102, 1173.1598837356828, 1281.7406109421354, 1349.0642419123037], 
"eval_len": [147, 216, 160, 322, 266, 174, 327, 228, 265, 253]}

 25%|██▍       | 249998/1000000 [2:10:16<4:44:23, 43.95it/s]global step 250000, trans_decision ep_re 870.0575948749058

{"global_step": 250000, "eval_re": [1092.1376739978236, 1148.7316530712374, 
1026.0499181314924, 1237.0471890505746, 1009.0648568540404, 120.2360605298981, 
807.6265109065835, 1167.5679345613419, 924.3655133817576, 167.74863826430888], 
"eval_len": [207, 220, 192, 222, 187, 25, 153, 218, 177, 32]}

 26%|██▌       | 259996/1000000 [2:16:02<4:27:37, 46.08it/s]global step 260000, trans_decision ep_re 917.801597032684

{"global_step": 260000, "eval_re": [880.0439438534212, 1016.8528340793623, 
1023.0846482672582, 775.3629154016617, 951.3151669613895, 1220.9907164727701, 
1249.044536362357, 763.2155797740971, 183.30880298978826, 1114.7968261647345], 
"eval_len": [168, 199, 193, 148, 184, 229, 234, 145, 35, 211]}

 27%|██▋       | 269999/1000000 [2:21:20<4:26:03, 45.73it/s]global step 270000, trans_decision ep_re 943.0453883664555

{"global_step": 270000, "eval_re": [924.5280797646598, 1306.9066165411894, 
685.0122530816373, 1044.853899453346, 935.0136626065232, 625.1448050578031, 
792.4888358206791, 992.327938054745, 708.2582874888805, 1415.9195057950915], 
"eval_len": [176, 238, 131, 198, 180, 116, 148, 183, 144, 265]}

 28%|██▊       | 279997/1000000 [2:26:51<4:36:45, 43.36it/s]global step 280000, trans_decision ep_re 809.344575467186

{"global_step": 280000, "eval_re": [633.7572518042081, 156.54333000026062, 
688.3031441378707, 937.6903759973461, 376.34958484855156, 1782.2069219455964, 
1083.6035747216029, 962.0582976550261, 177.82875640551197, 1295.1045171558865], 
"eval_len": [120, 30, 124, 181, 71, 350, 201, 182, 34, 237]}

 29%|██▉       | 289996/1000000 [2:32:15<4:16:41, 46.10it/s]global step 290000, trans_decision ep_re 1093.4117699774197

{"global_step": 290000, "eval_re": [390.80668070502344, 148.01381339084514, 
703.5317458366632, 2749.112839844436, 532.1124313316889, 676.3106340360765, 
1011.0618839606689, 2152.3458381935193, 1398.0022823821082, 1172.8195500931647],
"eval_len": [70, 30, 134, 510, 100, 130, 189, 395, 274, 220]}

 30%|██▉       | 299995/1000000 [2:37:16<4:14:40, 45.81it/s]global step 300000, trans_decision ep_re 776.4955857932171

{"global_step": 300000, "eval_re": [182.71169470080412, 625.0105866016801, 
151.36858166623125, 648.1987149991922, 909.7054980197327, 814.5430152849817, 
1483.6346324469228, 1302.2278892813529, 368.2087246401167, 1279.3465202911555], 
"eval_len": [35, 118, 29, 120, 173, 159, 289, 248, 71, 251]}

 31%|███       | 309996/1000000 [2:42:29<4:09:24, 46.11it/s]global step 310000, trans_decision ep_re 956.325219034575

{"global_step": 310000, "eval_re": [1222.3095429073235, 564.3047244116449, 
161.72597586471065, 640.8189983040006, 420.1991792898087, 1119.5163338159266, 
1156.4062908396622, 1600.5719001867064, 929.1598996750978, 1748.2393450508678], 
"eval_len": [232, 106, 31, 121, 77, 215, 228, 302, 171, 351]}

 32%|███▏      | 319996/1000000 [2:47:37<4:27:46, 42.33it/s]global step 320000, trans_decision ep_re 1229.0775272717647

{"global_step": 320000, "eval_re": [3187.1426257501284, 125.29736127744839, 
1296.3280071779166, 1505.3698667502974, 449.54583756136515, 2713.0807051042543, 
135.82111212332, 192.4875163530956, 1824.3014426469383, 861.4007979728833], 
"eval_len": [585, 24, 242, 290, 83, 506, 26, 37, 336, 169]}

 33%|███▎      | 329995/1000000 [2:53:46<4:17:07, 43.43it/s]global step 330000, trans_decision ep_re 1626.288667500918

{"global_step": 330000, "eval_re": [406.17706692459626, 3239.999168233254, 
622.9264153752969, 1960.1446154327489, 214.41987841736045, 1995.4096863880177, 
2228.9565229687973, 2887.9933617736356, 740.7921637883958, 1966.0677957070789], 
"eval_len": [74, 644, 119, 370, 41, 383, 419, 542, 139, 367]}

 34%|███▍      | 339995/1000000 [2:59:26<4:02:46, 45.31it/s]global step 340000, trans_decision ep_re 1603.6810111919194

{"global_step": 340000, "eval_re": [199.06988900363115, 130.71105669413166, 
1260.7160237196774, 3655.4993727522065, 1985.7095185930195, 1458.805192683475, 
1547.2079662513208, 418.9104667458009, 1123.0154788532286, 4257.165146622701], 
"eval_len": [38, 25, 239, 693, 373, 285, 300, 79, 228, 802]}

 35%|███▍      | 349998/1000000 [3:05:06<4:06:09, 44.01it/s]global step 350000, trans_decision ep_re 1563.9798870137165

{"global_step": 350000, "eval_re": [393.8893136494215, 4831.645184571167, 
1775.1588770624085, 1645.5127070203812, 927.1210274319377, 1955.0185913038126, 
527.4289229878455, 2458.640251621621, 223.98644858679646, 901.3975459017721], 
"eval_len": [78, 911, 336, 308, 178, 376, 111, 455, 44, 168]}

 36%|███▌      | 359997/1000000 [3:10:36<3:53:57, 45.59it/s]global step 360000, trans_decision ep_re 1335.2520238197214

{"global_step": 360000, "eval_re": [2078.746275015754, 1788.0598206596292, 
886.9210768701778, 698.4113396669344, 162.409586870833, 303.7922090524515, 
1150.97430143373, 3294.142222758304, 2622.7851584029336, 366.27824746646496], 
"eval_len": [384, 322, 158, 129, 31, 57, 214, 595, 471, 66]}

 37%|███▋      | 369995/1000000 [3:15:27<3:51:01, 45.45it/s]global step 370000, trans_decision ep_re 1146.805011608778

{"global_step": 370000, "eval_re": [1005.7681078137463, 675.1399222503247, 
321.0643226694231, 879.7639858506456, 1065.2420882929127, 1134.9056520112063, 
1757.0058896088535, 2175.75924431329, 1271.0897436421724, 1182.3111596352055], 
"eval_len": [192, 126, 59, 176, 195, 215, 324, 401, 233, 229]}

 38%|███▊      | 379999/1000000 [3:20:47<3:47:37, 45.40it/s]global step 380000, trans_decision ep_re 1454.3820803449078

{"global_step": 380000, "eval_re": [156.82091198194453, 173.05259782845394, 
792.1862650510725, 1047.4281513450305, 1667.991267694197, 5135.752772679441, 
3028.497594098967, 867.6853192354417, 862.9719090600548, 811.4340144744754], 
"eval_len": [30, 33, 147, 191, 305, 922, 541, 159, 160, 150]}

 39%|███▉      | 389995/1000000 [3:25:37<3:51:04, 44.00it/s]global step 390000, trans_decision ep_re 1227.9649598513165

{"global_step": 390000, "eval_re": [162.65049897696468, 913.7907661053297, 
1537.431336621854, 1542.496540412668, 2023.384502135365, 192.5252520463332, 
156.47058969101957, 2101.3759944035914, 3423.336072946383, 226.18804517365652], 
"eval_len": [31, 188, 288, 291, 387, 37, 30, 403, 652, 43]}

 40%|███▉      | 399995/1000000 [3:30:57<3:41:11, 45.21it/s]global step 400000, trans_decision ep_re 1786.1686543819292

{"global_step": 400000, "eval_re": [2156.9002597789013, 257.4092147440666, 
1011.2789521948321, 557.8712282031576, 2644.0362886637054, 3800.4724253026598, 
1280.855460784288, 1634.3822286091543, 2413.854480967812, 2104.6260045707154], 
"eval_len": [386, 51, 181, 104, 469, 681, 241, 298, 431, 389]}

 41%|████      | 409995/1000000 [3:35:57<3:37:26, 45.22it/s]global step 410000, trans_decision ep_re 1197.6101582406372

{"global_step": 410000, "eval_re": [558.5278600633376, 5551.148883107008, 
1224.2545309316754, 272.15179488097795, 250.72686383351976, 279.128813843909, 
145.96764899776207, 3334.612450569664, 187.45338267105757, 172.12935350746164], 
"eval_len": [97, 1000, 221, 50, 49, 53, 28, 605, 36, 33]}

 42%|████▏     | 419996/1000000 [3:40:50<3:29:54, 46.05it/s]global step 420000, trans_decision ep_re 1103.1715287759957

{"global_step": 420000, "eval_re": [141.0966729102361, 2929.166920150197, 
822.8063943294115, 3069.0215458182433, 1872.0443284940934, 851.2361633339182, 
157.07496003994405, 187.46161862377528, 849.7694201295192, 152.03726393061854], 
"eval_len": [27, 526, 151, 545, 332, 156, 30, 36, 153, 29]}

 43%|████▎     | 429996/1000000 [3:45:52<3:26:27, 46.01it/s]global step 430000, trans_decision ep_re 1053.4633718614132

{"global_step": 430000, "eval_re": [829.8612190778391, 1376.0875007572813, 
152.29554122466104, 1158.0233523593545, 2037.5486667582481, 171.71410941769415, 
220.1124886323239, 601.2699203043499, 821.3223688584308, 3166.3985512239487], 
"eval_len": [152, 273, 29, 213, 372, 33, 42, 119, 148, 600]}

 44%|████▍     | 439996/1000000 [3:51:07<3:21:50, 46.24it/s]global step 440000, trans_decision ep_re 2690.4429640305398

{"global_step": 440000, "eval_re": [5544.384335005016, 3680.8709678624236, 
2108.530666725023, 178.21360007405667, 2975.099506013992, 708.1062792214905, 
420.8190819705874, 4338.079978417762, 3205.0826784230408, 3745.2425465920064], 
"eval_len": [1000, 674, 381, 34, 548, 138, 76, 785, 578, 675]}

 45%|████▍     | 449995/1000000 [3:56:17<3:21:52, 45.41it/s]global step 450000, trans_decision ep_re 1696.5591835388436

{"global_step": 450000, "eval_re": [345.09730988369097, 1704.2546365423764, 
1487.0107357668373, 1803.4968760051856, 1089.1681487135704, 840.0170499020332, 
585.4317797368626, 1590.5163195719845, 5497.374747747128, 2023.2242315187689], 
"eval_len": [64, 328, 269, 340, 196, 154, 109, 291, 1000, 363]}

 46%|████▌     | 459995/1000000 [4:01:08<3:17:43, 45.52it/s]global step 460000, trans_decision ep_re 892.4139376691937

{"global_step": 460000, "eval_re": [196.65222702553467, 866.0130763161329, 
2387.068530719359, 1648.485426600771, 182.48544905438158, 146.5102544797874, 
584.299849689768, 349.37869807846124, 972.67251791839, 1590.5733468093515], 
"eval_len": [38, 157, 443, 288, 35, 28, 107, 67, 180, 283]}

 47%|████▋     | 469995/1000000 [4:06:08<3:16:46, 44.89it/s]global step 470000, trans_decision ep_re 985.5764449712908

{"global_step": 470000, "eval_re": [707.6888121751248, 2258.567679665297, 
2592.561563020255, 124.96282491061001, 182.5607698426526, 320.8848835556901, 
615.5228762193013, 204.16799453059974, 161.87640829455796, 2686.9706374988186], 
"eval_len": [127, 418, 465, 24, 35, 59, 112, 39, 31, 485]}

 48%|████▊     | 479995/1000000 [4:11:08<3:09:19, 45.78it/s]global step 480000, trans_decision ep_re 1165.7454347950493

{"global_step": 480000, "eval_re": [5614.622642902488, 345.9451134149198, 
1052.6081117368703, 816.2400960713811, 1179.7160510630456, 1581.6200631185072, 
436.769611383522, 266.66417689106373, 207.826899052371, 155.44158231632562], 
"eval_len": [1000, 62, 186, 160, 207, 285, 80, 51, 40, 30]}

 49%|████▉     | 489996/1000000 [4:16:11<3:04:27, 46.08it/s]global step 490000, trans_decision ep_re 1085.154424063467

{"global_step": 490000, "eval_re": [1996.0714797345179, 611.8285024746218, 
318.23491185904896, 760.2097343389012, 654.9764078802658, 1228.479616587415, 
1142.119930439624, 908.5579853861277, 2604.241282183241, 626.8243897509075], 
"eval_len": [362, 116, 59, 140, 127, 239, 202, 177, 481, 115]}

 50%|████▉     | 499996/1000000 [4:21:14<3:01:00, 46.04it/s]global step 500000, trans_decision ep_re 955.467958449222

{"global_step": 500000, "eval_re": [161.6609738161897, 307.2747520193278, 
284.7258481221303, 163.35362214244478, 504.6735557798045, 2498.5843173003673, 
3383.706954829631, 275.35371590821285, 323.2986978581328, 1652.0471467159791], 
"eval_len": [31, 59, 53, 31, 94, 475, 600, 53, 63, 294]}

 51%|█████     | 509996/1000000 [4:26:27<3:35:47, 37.85it/s]global step 510000, trans_decision ep_re 2551.1561520077425

{"global_step": 510000, "eval_re": [3700.682898695705, 5795.5198305050335, 
4988.935233997619, 902.0856155729467, 551.8394215237053, 843.7416522245929, 
322.2911873932396, 2233.965679144978, 5092.377129177358, 1080.1228718422458], 
"eval_len": [636, 1000, 865, 171, 102, 155, 60, 390, 874, 191]}

 52%|█████▏    | 519995/1000000 [4:31:37<2:56:50, 45.24it/s]global step 520000, trans_decision ep_re 1754.613273465758

{"global_step": 520000, "eval_re": [188.6418314544986, 740.8489350962809, 
670.1292640341575, 2822.036169939413, 1808.9058241122102, 3524.5544110211345, 
1826.930930840281, 2772.8607917966633, 2548.507854560471, 642.7167218024712], 
"eval_len": [36, 134, 124, 500, 322, 628, 333, 482, 464, 120]}

 53%|█████▎    | 529995/1000000 [4:36:29<4:49:37, 27.05it/s]global step 530000, trans_decision ep_re 1480.3129973213922

{"global_step": 530000, "eval_re": [1352.908874539163, 389.77660001317895, 
198.36986291721996, 217.93978056157408, 3080.4296309992747, 3541.6088106506722, 
183.35821981034255, 2578.8300219892276, 2130.2536271816853, 1129.654544551584], 
"eval_len": [236, 73, 38, 42, 521, 602, 35, 448, 374, 196]}

 54%|█████▍    | 539995/1000000 [4:41:47<2:46:50, 45.95it/s]global step 540000, trans_decision ep_re 2286.7203069950983

{"global_step": 540000, "eval_re": [141.32982652970676, 1298.188704931993, 
3877.5630631222193, 1918.9932755410907, 5666.530554325601, 2790.523362585309, 
735.4545649317973, 1062.699677674136, 1847.642262709283, 3528.2777775998475], 
"eval_len": [27, 233, 698, 347, 1000, 504, 133, 197, 329, 620]}

 55%|█████▍    | 549996/1000000 [4:46:37<2:40:53, 46.62it/s]global step 550000, trans_decision ep_re 1837.701398348515

{"global_step": 550000, "eval_re": [507.64173873967945, 2173.4136496668825, 
3159.0314290289243, 570.6603299265081, 2093.073679812759, 4288.667843696426, 
1345.5880309848071, 2815.6031488886524, 203.20311661322393, 1220.1310161272831],
"eval_len": [88, 387, 590, 112, 371, 769, 259, 489, 39, 221]}

 56%|█████▌    | 559997/1000000 [4:51:39<2:39:46, 45.90it/s]global step 560000, trans_decision ep_re 1389.0924147995952

{"global_step": 560000, "eval_re": [2218.703700776791, 1427.7153993670142, 
924.2052613713959, 653.6140341067635, 3470.678728179854, 2872.3142228284255, 
328.85538669597526, 1576.674177140372, 232.0947386434327, 186.0684988859302], 
"eval_len": [373, 275, 159, 115, 573, 471, 62, 270, 44, 36]}

 57%|█████▋    | 569997/1000000 [4:56:38<2:35:57, 45.95it/s]global step 570000, trans_decision ep_re 1319.9544438252328

{"global_step": 570000, "eval_re": [2126.363151079207, 1798.0901278100719, 
135.7565149554512, 3413.022821698899, 742.286119826197, 431.00381460633577, 
285.80589859762506, 878.0489257676902, 1670.9201665510416, 1718.2468973598102], 
"eval_len": [361, 308, 26, 573, 138, 79, 54, 157, 295, 296]}

 58%|█████▊    | 579999/1000000 [5:01:48<2:32:24, 45.93it/s]global step 580000, trans_decision ep_re 1835.0262907137687

{"global_step": 580000, "eval_re": [2875.948688168052, 2520.6172274180954, 
903.1661915852768, 420.6646006082283, 2091.7386599097917, 208.48901483504304, 
3352.4014637132977, 1140.1808189957874, 2726.2420591742343, 2110.814182729882], 
"eval_len": [494, 435, 160, 84, 366, 40, 581, 210, 475, 378]}

 59%|█████▉    | 589998/1000000 [5:06:38<2:27:35, 46.30it/s]global step 590000, trans_decision ep_re 1761.995404652246

{"global_step": 590000, "eval_re": [310.87119437670185, 1085.2214017449317, 
662.8383526613301, 1550.8801956241266, 5530.296658503181, 341.72599364753546, 
370.29808111425234, 1077.8330534885915, 5894.203423468468, 795.7856918933388], 
"eval_len": [58, 191, 131, 272, 956, 63, 67, 188, 1000, 142]}

 60%|█████▉    | 599999/1000000 [5:11:48<2:25:34, 45.80it/s]global step 600000, trans_decision ep_re 1469.7189823176911

{"global_step": 600000, "eval_re": [828.3371245781193, 3731.712083405009, 
157.2068084194488, 1718.148883085236, 178.29429988880477, 351.4202990709504, 
166.40508703784377, 3131.7796421231787, 4277.016953599278, 156.86864196904145], 
"eval_len": [150, 640, 30, 300, 34, 64, 32, 536, 738, 30]}

 61%|██████    | 609995/1000000 [5:16:48<2:21:24, 45.96it/s]global step 610000, trans_decision ep_re 1588.0630536704855

{"global_step": 610000, "eval_re": [5940.584470525205, 351.9291934549646, 
338.7005238326881, 1711.3360741088181, 3209.230873051662, 296.8520275933494, 
182.51738898943313, 182.78204590076572, 1190.6962275622623, 2476.0017116857075],
"eval_len": [1000, 63, 63, 290, 538, 55, 35, 35, 206, 424]}

 62%|██████▏   | 619995/1000000 [5:21:34<2:17:39, 46.01it/s]global step 620000, trans_decision ep_re 685.8976011454852

{"global_step": 620000, "eval_re": [131.04361683426788, 598.4078128811864, 
509.3755516639917, 1709.8471561269675, 2465.7053380297143, 315.87321126882637, 
616.1418820364717, 198.35502852147727, 146.48075708747, 167.7456570044792], 
"eval_len": [25, 106, 96, 285, 406, 59, 108, 38, 28, 32]}

 63%|██████▎   | 629995/1000000 [5:26:28<2:14:24, 45.88it/s]global step 630000, trans_decision ep_re 1546.630896266047

{"global_step": 630000, "eval_re": [322.6596182437258, 2215.012117713641, 
596.3587229116973, 795.3648211347719, 672.8892731772625, 902.038864401856, 
6030.512127345212, 3237.4102680249393, 496.2369519799368, 197.8261977274279], 
"eval_len": [58, 373, 108, 145, 129, 159, 989, 536, 90, 38]}

 64%|██████▍   | 639995/1000000 [5:31:38<2:10:16, 46.06it/s]global step 640000, trans_decision ep_re 1770.0602168014873

{"global_step": 640000, "eval_re": [847.1443222322902, 1863.7199787699633, 
224.39923662949892, 954.7125859808403, 1035.267711676114, 3993.8312978531894, 
141.50245446873828, 2363.6093093211202, 2263.906359113117, 4012.5089119700037], 
"eval_len": [154, 328, 43, 167, 186, 689, 27, 413, 399, 701]}

 65%|██████▍   | 649997/1000000 [5:36:28<2:07:23, 45.79it/s]global step 650000, trans_decision ep_re 938.2008704122284

{"global_step": 650000, "eval_re": [186.6494663712197, 182.88878662753373, 
146.35249028065297, 194.54415147974964, 172.90043270162684, 261.6487641541047, 
3816.414622601794, 1291.9338660886997, 344.2891106333663, 2784.387013183535], 
"eval_len": [36, 35, 28, 37, 33, 51, 675, 226, 64, 502]}

 66%|██████▌   | 659998/1000000 [5:41:24<2:01:42, 46.56it/s]global step 660000, trans_decision ep_re 535.0680130081051

{"global_step": 660000, "eval_re": [293.500025289145, 335.7940368518109, 
341.56225863221846, 632.1246974216327, 672.6069732582456, 1175.8121014764772, 
590.8556008715856, 600.1379980758179, 566.9699794854045, 141.3164587187138], 
"eval_len": [56, 63, 64, 123, 136, 232, 122, 122, 115, 27]}

 67%|██████▋   | 669998/1000000 [5:46:20<1:58:38, 46.36it/s]global step 670000, trans_decision ep_re 1568.946439724455

{"global_step": 670000, "eval_re": [886.72528659643, 217.69416020613772, 
5137.3874901771105, 3699.1736464142396, 710.123927729272, 1245.3724667844933, 
188.76524342640002, 1716.0965127441395, 1742.4669199745695, 145.65874319175893],
"eval_len": [157, 42, 888, 622, 132, 224, 36, 309, 305, 28]}

 68%|██████▊   | 679999/1000000 [5:51:20<1:56:50, 45.65it/s]global step 680000, trans_decision ep_re 537.7254530721327

{"global_step": 680000, "eval_re": [442.7780285227072, 783.2734181817241, 
2923.641731634168, 183.0680525908009, 161.41572520870733, 146.77033358419726, 
271.595483833702, 167.04825063220787, 156.63197365138282, 141.03153288172953], 
"eval_len": [82, 139, 510, 35, 31, 28, 52, 32, 30, 27]}

 69%|██████▉   | 689995/1000000 [5:56:28<1:52:32, 45.91it/s]global step 690000, trans_decision ep_re 1402.4820828131185

{"global_step": 690000, "eval_re": [1973.2438886233992, 1341.345885235938, 
970.7788036631097, 135.8918224938485, 606.3568686407272, 1136.4726505561296, 
308.42090778017047, 191.62168847654178, 6076.354135483105, 1284.3341771782161], 
"eval_len": [333, 237, 171, 26, 111, 216, 58, 37, 1000, 224]}

 70%|██████▉   | 699997/1000000 [6:01:28<1:49:02, 45.85it/s]global step 700000, trans_decision ep_re 2956.5181077454567

{"global_step": 700000, "eval_re": [5625.121894836218, 4549.09567707435, 
3058.477367202019, 5576.887909860796, 5628.628826197743, 4051.2272814667504, 
125.62335963956265, 151.57650983329563, 157.03289997379616, 641.509351370033], 
"eval_len": [1000, 818, 545, 1000, 1000, 716, 24, 29, 30, 117]}

 71%|███████   | 709995/1000000 [6:06:23<1:45:56, 45.62it/s]global step 710000, trans_decision ep_re 1234.082367925059

{"global_step": 710000, "eval_re": [919.0425315411692, 193.75733088602342, 
167.62593062459436, 6072.62576427737, 1948.8512160853875, 484.37493295728433, 
151.30839764338316, 188.13361099616355, 1209.2648249003319, 1005.8391393388829],
"eval_len": [161, 37, 32, 1000, 328, 87, 29, 36, 210, 171]}

 72%|███████▏  | 719997/1000000 [6:11:21<1:41:51, 45.82it/s]global step 720000, trans_decision ep_re 890.9856166951188

{"global_step": 720000, "eval_re": [1479.610384410363, 502.61532259674686, 
193.25344563082817, 201.9091708411099, 1352.6780304995034, 199.52970471595037, 
2993.760945100169, 220.54214112713396, 1570.0235820320888, 195.93343999729413], 
"eval_len": [279, 91, 37, 39, 244, 38, 513, 42, 267, 38]}

 73%|███████▎  | 729997/1000000 [6:16:28<1:38:41, 45.60it/s]global step 730000, trans_decision ep_re 1488.1327905836456

{"global_step": 730000, "eval_re": [926.7235304471518, 192.27829162465204, 
977.0904253935744, 156.65580988027043, 2708.0376940353394, 334.6230224594971, 
2947.8635016536223, 172.01908627431212, 4242.477069046657, 2223.5594750213804], 
"eval_len": [172, 37, 178, 30, 485, 61, 534, 33, 753, 391]}

 74%|███████▍  | 739996/1000000 [6:21:28<1:32:58, 46.61it/s]global step 740000, trans_decision ep_re 783.7975156876473

{"global_step": 740000, "eval_re": [162.4093465088268, 598.2488039271778, 
886.8378275365118, 203.69656031700217, 609.9855052126541, 1083.9696542089753, 
3494.519103185358, 509.9250022898546, 146.67754484744756, 141.70580884266548], 
"eval_len": [31, 107, 159, 39, 111, 193, 646, 96, 28, 27]}

 75%|███████▍  | 749995/1000000 [6:26:28<1:30:52, 45.85it/s]global step 750000, trans_decision ep_re 1724.0564507780134

{"global_step": 750000, "eval_re": [1503.3122400248826, 2972.8468318316395, 
2845.558429240884, 668.5807548130134, 1674.7286585000945, 612.1760263557528, 
2501.2752728907153, 1845.8214091838515, 1606.8885122089941, 1009.3763727303079],
"eval_len": [257, 498, 470, 123, 280, 107, 411, 309, 273, 180]}

 76%|███████▌  | 759997/1000000 [6:31:17<1:26:40, 46.15it/s]global step 760000, trans_decision ep_re 679.9193674066771

{"global_step": 760000, "eval_re": [167.0360767089254, 2888.128512308109, 
216.35100684716434, 558.88521253906, 152.12143447391776, 182.10436720168394, 
157.59192541980136, 1157.5126035348912, 369.4761619629152, 949.9863730703038], 
"eval_len": [32, 466, 41, 100, 29, 35, 30, 207, 66, 167]}

 77%|███████▋  | 769998/1000000 [6:36:11<1:22:20, 46.55it/s]global step 770000, trans_decision ep_re 1028.4650925350174

{"global_step": 770000, "eval_re": [2106.990221020007, 2911.182831575071, 
183.08248750258957, 190.8680265954774, 1006.8518914787637, 2547.128898352017, 
403.0458613421877, 647.4332616577494, 162.62853644953722, 125.43890937677443], 
"eval_len": [364, 507, 35, 37, 182, 445, 74, 119, 31, 24]}

 78%|███████▊  | 779998/1000000 [6:41:18<1:19:09, 46.32it/s]global step 780000, trans_decision ep_re 1066.3502599263709

{"global_step": 780000, "eval_re": [376.75501177877095, 1741.6292237753432, 
785.0314071922778, 2845.3944364670915, 2272.7622858331997, 781.8306756433323, 
716.5473315473487, 851.1518165423257, 145.8627777803074, 146.53763270371346], 
"eval_len": [72, 311, 137, 482, 419, 139, 133, 146, 28, 28]}

 79%|███████▉  | 789995/1000000 [6:46:04<1:16:19, 45.86it/s]global step 790000, trans_decision ep_re 1150.4023349081642

{"global_step": 790000, "eval_re": [5234.433129220729, 172.97752989879731, 
360.79549952033926, 437.88454733146966, 184.36890493103598, 332.32762501754615, 
779.4281201525746, 287.04965599930773, 1869.920802855234, 1844.8375341546093], 
"eval_len": [888, 33, 66, 79, 35, 61, 134, 54, 318, 316]}

 80%|███████▉  | 799996/1000000 [6:50:59<1:11:25, 46.67it/s]global step 800000, trans_decision ep_re 1820.59004608963

{"global_step": 800000, "eval_re": [1343.6697563631317, 2346.9292137699763, 
3730.936883414621, 2699.0594302281656, 2372.4820349304773, 225.86008641624585, 
286.5128550593184, 146.7520116951961, 1696.7330849821699, 3356.965104036997], 
"eval_len": [231, 410, 649, 460, 407, 43, 54, 28, 307, 568]}

 81%|████████  | 809998/1000000 [6:55:59<1:08:07, 46.49it/s]global step 810000, trans_decision ep_re 1628.1284193725799

{"global_step": 810000, "eval_re": [445.1511349077851, 167.09344049946966, 
2823.1339526884703, 2619.1895592543124, 697.918155628544, 343.34303469800767, 
3094.8803133242036, 1473.8810682931617, 1303.0912765111689, 3313.6022579206774],
"eval_len": [81, 32, 483, 446, 126, 64, 527, 256, 223, 580]}

 82%|████████▏ | 819999/1000000 [7:01:08<1:05:27, 45.84it/s]global step 820000, trans_decision ep_re 2417.1512639995117

{"global_step": 820000, "eval_re": [1079.8503705480978, 5949.931040215511, 
1886.584421989459, 1214.6223848078719, 219.04282660113597, 3193.23360450599, 
274.64030234626415, 1643.730856464002, 2922.0869045555996, 5787.789927961189], 
"eval_len": [187, 1000, 324, 213, 42, 546, 52, 290, 513, 1000]}

 83%|████████▎ | 829996/1000000 [7:06:01<1:00:54, 46.52it/s]global step 830000, trans_decision ep_re 1669.6507977638335

{"global_step": 830000, "eval_re": [1061.5611720306226, 5492.9282616961855, 
3543.7766708620447, 363.54055233094095, 441.02345070498905, 752.3388093110364, 
662.6096922369227, 1064.3487241965554, 1586.9951983502353, 1727.3854459188008], 
"eval_len": [190, 968, 629, 69, 80, 141, 115, 190, 287, 304]}

 84%|████████▍ | 839996/1000000 [7:11:00<57:19, 46.52it/s]global step 840000, trans_decision ep_re 825.3201509983977

{"global_step": 840000, "eval_re": [959.025706556917, 172.43205129235162, 
2832.3274340244498, 783.0718059344962, 906.9384713345726, 414.9018878410378, 
1213.5229919066023, 591.6195941957965, 207.00518146898037, 172.35638542877325], 
"eval_len": [172, 33, 493, 144, 153, 74, 220, 107, 40, 33]}

 85%|████████▍ | 849997/1000000 [7:15:54<54:12, 46.13it/s]global step 850000, trans_decision ep_re 945.1409740048155

{"global_step": 850000, "eval_re": [1153.6660619067134, 1651.7141296050272, 
203.4249396351502, 514.2016746573797, 1039.0067210792104, 157.23762515072946, 
2165.953688737318, 155.72593227289693, 192.99299233977106, 2217.4859746639595], 
"eval_len": [203, 285, 39, 94, 179, 30, 375, 30, 37, 381]}

 86%|████████▌ | 859999/1000000 [7:20:59<50:35, 46.13it/s]global step 860000, trans_decision ep_re 1039.1956610534771

{"global_step": 860000, "eval_re": [171.25604476059797, 4099.363270760885, 
2069.028206946834, 478.15121288197275, 168.35064396557596, 301.8320658846594, 
253.50482193479863, 1014.7319305626856, 1483.132475669179, 352.60593716758126], 
"eval_len": [33, 700, 355, 87, 32, 56, 49, 181, 271, 65]}

 87%|████████▋ | 869995/1000000 [7:25:59<47:13, 45.89it/s]global step 870000, trans_decision ep_re 1750.776668771583

{"global_step": 870000, "eval_re": [2154.2695681095247, 1204.3054896679423, 
3131.5613591812357, 187.51903031735114, 645.924275235749, 1497.6070453994105, 
146.57402484755517, 2433.0518045300787, 1103.0465080549857, 5003.907582371997], 
"eval_len": [371, 212, 540, 36, 110, 263, 28, 423, 198, 868]}

 88%|████████▊ | 879996/1000000 [7:30:43<42:33, 47.00it/s]global step 880000, trans_decision ep_re 1112.2735941687897

{"global_step": 880000, "eval_re": [4228.656337933666, 1694.0206878541417, 
1437.837171671325, 183.0075304680591, 797.96633720717, 1624.4018950627299, 
264.67560507236186, 205.08567110715106, 125.62434547105306, 561.4603598402412], 
"eval_len": [706, 312, 245, 35, 139, 286, 50, 39, 24, 101]}

 89%|████████▉ | 889996/1000000 [7:35:42<39:41, 46.19it/s]global step 890000, trans_decision ep_re 1104.620904765391

{"global_step": 890000, "eval_re": [259.2389753372812, 157.2309888927909, 
224.52210339922203, 5306.833043696583, 1541.8533665139844, 226.23381790640892, 
516.9385953833769, 222.75291286574864, 1597.862098225365, 992.7431454331498], 
"eval_len": [50, 30, 43, 901, 268, 45, 98, 43, 274, 170]}

 90%|████████▉ | 899996/1000000 [7:40:59<36:11, 46.04it/s]global step 900000, trans_decision ep_re 1517.940774851721

{"global_step": 900000, "eval_re": [1003.0139496135131, 2054.7809044624364, 
297.80351714000386, 975.7095010992775, 1564.3053426740032, 1683.625518124902, 
882.4116857740745, 239.53119383097814, 5714.921809639465, 763.3043261585558], 
"eval_len": [180, 355, 55, 173, 283, 300, 156, 46, 1000, 133]}

 91%|█████████ | 909995/1000000 [7:45:50<32:52, 45.64it/s]global step 910000, trans_decision ep_re 1367.73532161945

{"global_step": 910000, "eval_re": [2511.4173963371763, 4741.611811032713, 
549.6316459367571, 3193.550096175326, 172.65851719123418, 1795.6640664307024, 
157.38790912838866, 203.82019313167487, 185.1614558278107, 166.45012500271477], 
"eval_len": [432, 803, 99, 540, 33, 306, 30, 39, 35, 32]}

 92%|█████████▏| 919995/1000000 [7:51:09<29:13, 45.62it/s]global step 920000, trans_decision ep_re 1913.9308195112808

{"global_step": 920000, "eval_re": [4015.684899305224, 1473.9536405853908, 
390.80425123523054, 173.14915362774113, 4917.111309903704, 356.0595744480848, 
2233.4212749794547, 2386.947950872799, 2443.9909996952756, 748.1851404599053], 
"eval_len": [685, 252, 71, 33, 825, 65, 380, 403, 424, 133]}

 93%|█████████▎| 929995/1000000 [7:56:00<25:32, 45.67it/s]global step 930000, trans_decision ep_re 1427.2622885687138

{"global_step": 930000, "eval_re": [1121.8411045583848, 2958.598951848123, 
3099.1637685331366, 1927.9676450903266, 1666.8782624182177, 774.3442752605331, 
194.31692221959688, 1908.5423306345904, 484.8305000898978, 136.13912503433113], 
"eval_len": [186, 496, 535, 332, 289, 136, 37, 329, 87, 26]}

 94%|█████████▍| 939995/1000000 [8:01:20<22:14, 44.96it/s]global step 940000, trans_decision ep_re 2512.460104922716

{"global_step": 940000, "eval_re": [177.97449717631767, 815.0780280828999, 
3958.223540770245, 1515.515339807333, 3093.934852570466, 4580.503553478349, 
5910.374909483319, 1761.1286396097792, 2946.117410402715, 365.7502778457324], 
"eval_len": [34, 143, 680, 262, 517, 793, 1000, 308, 497, 67]}

 95%|█████████▍| 949995/1000000 [8:06:30<18:16, 45.60it/s]global step 950000, trans_decision ep_re 1318.997312975683

{"global_step": 950000, "eval_re": [1089.652150850026, 2338.088540519079, 
275.43976880295554, 5949.613431845114, 167.2273778999131, 1481.9226921584993, 
326.8595079633077, 1149.5138250402458, 286.54339508791713, 125.1124395897702], 
"eval_len": [192, 404, 52, 1000, 32, 258, 59, 227, 54, 24]}

 96%|█████████▌| 959995/1000000 [8:11:20<14:37, 45.60it/s]global step 960000, trans_decision ep_re 1516.9329734959717

{"global_step": 960000, "eval_re": [176.7200340345534, 535.4975611688936, 
657.3226777965267, 3825.428967164652, 2605.3752165078117, 1889.4552516451467, 
1117.6926813947423, 2161.345175594039, 618.5314805758782, 1581.9606890774714], 
"eval_len": [34, 100, 116, 651, 466, 327, 203, 381, 112, 276]}

 97%|█████████▋| 969995/1000000 [8:16:24<10:58, 45.58it/s]global step 970000, trans_decision ep_re 1705.416575506959

{"global_step": 970000, "eval_re": [181.0902860423038, 202.75378209500383, 
3424.8333660513076, 3022.4625193451798, 1724.648291319833, 717.7204807546425, 
1055.0485697338997, 204.21776789799466, 1087.5282350949217, 5433.862456734502], 
"eval_len": [35, 39, 563, 495, 289, 129, 179, 39, 184, 890]}

 98%|█████████▊| 979995/1000000 [8:21:40<07:18, 45.61it/s]global step 980000, trans_decision ep_re 1301.8732982076856

{"global_step": 980000, "eval_re": [1919.372293189563, 1395.5274089538557, 
173.17327909335935, 1148.1516764423745, 204.35068613496986, 193.33423455422184, 
1505.041560635438, 2927.485380742894, 1642.3660051472625, 1909.9304571829177], 
"eval_len": [327, 235, 33, 195, 39, 37, 254, 502, 278, 326]}

 99%|█████████▉| 989995/1000000 [8:26:40<04:16, 39.00it/s]global step 990000, trans_decision ep_re 1776.0883608254856

{"global_step": 990000, "eval_re": [5893.22852222457, 1398.4424920592253, 
210.97613040980448, 955.6945704230732, 464.2999912387247, 177.67726720449156, 
1388.977773862961, 4179.299135411914, 2966.618206337823, 125.6695190822701], 
"eval_len": [1000, 244, 40, 170, 84, 34, 240, 713, 502, 24]}

100%|█████████▉| 999995/1000000 [8:31:31<00:00, 45.44it/s]global step 1000000, trans_decision ep_re 1620.7029351749657

{"global_step": 1000000, "eval_re": [4440.333338804908, 242.7277915207722, 
242.11157953957732, 1003.9009200548057, 199.29705766600912, 2496.0685741485827, 
183.21630323415016, 5963.620127722621, 260.49615271803725, 1175.257506340192], 
"eval_len": [732, 45, 46, 177, 38, 420, 35, 1000, 49, 199]}

100%|██████████| 1000000/1000000 [8:31:47<00:00, 32.57it/s]
