
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9997/1000000 [02:05<5:15:38, 52.27it/s]global step 10000, trans_decision ep_re 179.0404543551524

{"global_step": 10000, "eval_re": [321.640920301191, 146.6813726249862, 
26.594177734885946, 32.52817561910951, 228.28071922190844, 59.757176544706205, 
332.6985499395676, 321.0552555269602, 139.64240167322913, 181.52579436497956], 
"eval_len": [134, 84, 30, 39, 104, 67, 155, 134, 69, 91]}

  2%|▏         | 19999/1000000 [06:09<5:13:21, 52.12it/s]global step 20000, trans_decision ep_re 315.4509675340298

{"global_step": 20000, "eval_re": [355.1912451175841, 322.80199357892184, 
225.2419128516411, 353.4091405334581, 382.9947668633196, 203.4833063307537, 
364.5289379939188, 321.67353086522036, 312.20083350375637, 312.98400770172435], 
"eval_len": [174, 138, 109, 171, 220, 106, 183, 142, 133, 132]}

  3%|▎         | 29995/1000000 [10:15<5:15:16, 51.28it/s]global step 30000, trans_decision ep_re 269.73097381696147

{"global_step": 30000, "eval_re": [3.7548518217770273, 351.9608184509429, 
334.2821790436127, 333.22998386169536, 329.3910553834103, 82.50568657176335, 
335.0176065842934, 266.04468634682615, 333.52175453898667, 327.60111556630693], 
"eval_len": [17, 151, 123, 123, 122, 71, 134, 115, 123, 122]}

  4%|▍         | 39999/1000000 [14:21<5:05:18, 52.41it/s]global step 40000, trans_decision ep_re 309.78465631376645

{"global_step": 40000, "eval_re": [375.4606624223329, 297.25557652581125, 
416.96879376937335, 362.63772406791384, 144.13656091720978, 374.88226018072544, 
402.4642907327708, 336.45059764334906, 36.77188335126014, 350.8182135269184], 
"eval_len": [135, 115, 149, 131, 72, 155, 143, 128, 38, 166]}

  5%|▍         | 49995/1000000 [18:26<5:01:38, 52.49it/s]global step 50000, trans_decision ep_re 480.5008852325762

{"global_step": 50000, "eval_re": [476.8968465873092, 240.05782506285385, 
74.4105691117179, 150.12974059353812, 319.6180976209559, 454.09130081294694, 
661.6864172276032, 1216.6104937445295, 224.03793182884766, 987.4696297354596], 
"eval_len": [199, 117, 45, 81, 155, 218, 250, 383, 104, 326]}

  6%|▌         | 59997/1000000 [22:33<4:59:48, 52.26it/s]global step 60000, trans_decision ep_re 622.0098694568375

{"global_step": 60000, "eval_re": [444.1628352129761, 51.328160901880736, 
1194.0177918133693, 730.993375448448, 1230.6207630155445, 411.23765343786965, 
1225.7897454839438, 116.20778394902463, 255.7073580216947, 560.0332272836234], 
"eval_len": [188, 64, 501, 299, 485, 178, 512, 66, 119, 246]}

  7%|▋         | 69999/1000000 [26:43<4:54:33, 52.62it/s]global step 70000, trans_decision ep_re 432.61264220362307

{"global_step": 70000, "eval_re": [86.32961662841771, 47.67306173448533, 
122.8241562181216, 786.1707737625923, 805.1472264574212, 223.85777693387809, 
724.7198522783177, 1034.1895421237316, 320.74372105414994, 174.47069484511485], 
"eval_len": [52, 58, 63, 286, 282, 109, 274, 359, 146, 89]}

  8%|▊         | 79995/1000000 [31:00<4:57:17, 51.58it/s]global step 80000, trans_decision ep_re 712.9778157788787

{"global_step": 80000, "eval_re": [674.9537851974383, 1543.5121327018344, 
40.45968776368264, 2984.799193726771, 45.079485556441384, 53.867563557885674, 
398.74330593328904, 185.17813406002725, 699.0842566346812, 504.1006126567365], 
"eval_len": [249, 504, 58, 956, 46, 53, 174, 98, 250, 189]}

  9%|▉         | 89995/1000000 [35:10<4:47:43, 52.71it/s]global step 90000, trans_decision ep_re 780.1128877338901

{"global_step": 90000, "eval_re": [302.3159139000213, 2116.7709591099415, 
494.4478215098472, 2234.012214898434, 78.69617329551842, 33.323626769138606, 
729.9487566902956, 1542.6632801624455, 211.15904212088256, 57.79108888237588], 
"eval_len": [140, 619, 200, 701, 69, 35, 282, 485, 101, 36]}

 10%|▉         | 99995/1000000 [39:20<4:44:23, 52.74it/s]global step 100000, trans_decision ep_re 1153.1338362796355

{"global_step": 100000, "eval_re": [45.85432747562919, 981.8535424650455, 
1156.2137179678894, 229.8363263086134, 2630.728889169465, 492.5478183304383, 
1338.4796197152366, 1932.8037667808212, 46.64564054215567, 2676.374714041059], 
"eval_len": [50, 359, 378, 110, 753, 189, 426, 632, 52, 776]}

 11%|█         | 109995/1000000 [43:17<4:39:39, 53.04it/s]global step 110000, trans_decision ep_re 738.9821234607974

{"global_step": 110000, "eval_re": [69.46546778814897, 52.834523792725605, 
632.8960385987044, 82.88913594078217, 24.3990458959799, 337.13743069419263, 
272.552755440379, 49.40201775260437, 2644.3945373168685, 3223.8502813875893], 
"eval_len": [79, 68, 265, 91, 29, 154, 123, 54, 837, 1000]}

 12%|█▏        | 119997/1000000 [47:24<4:44:06, 51.62it/s]global step 120000, trans_decision ep_re 715.3152807992724

{"global_step": 120000, "eval_re": [964.8434655574356, 894.765566691886, 
508.69414160070664, 1412.2099472346356, 901.635970326352, 284.5273109486829, 
42.91735876647625, 29.216719998834982, 556.9406050388191, 1557.4017218288946], 
"eval_len": [320, 287, 187, 399, 290, 124, 49, 32, 202, 457]}

 13%|█▎        | 129999/1000000 [51:29<4:28:43, 53.96it/s]global step 130000, trans_decision ep_re 334.6372270121371

{"global_step": 130000, "eval_re": [1068.307326351685, 176.01768150608493, 
41.61050017833868, 32.380191113237586, 26.616338801996154, 239.8798635815484, 
42.76092218830701, 245.2269506386048, 245.1747962871881, 1228.3976994743803], 
"eval_len": [304, 85, 42, 38, 31, 109, 45, 111, 126, 341]}

 14%|█▍        | 139995/1000000 [55:32<4:28:25, 53.40it/s]global step 140000, trans_decision ep_re 626.0878131305255

{"global_step": 140000, "eval_re": [876.7786289482958, 30.763943722837137, 
51.38588937243021, 211.26371914401435, 1354.396509616621, 3.0478280792265524, 
547.340687621588, 449.32173523107565, 1509.4516543566901, 1227.1275352124767], 
"eval_len": [293, 32, 61, 146, 386, 18, 201, 199, 446, 357]}

 15%|█▍        | 149997/1000000 [59:36<4:25:28, 53.36it/s]global step 150000, trans_decision ep_re 827.631684964556

{"global_step": 150000, "eval_re": [1532.9649712501136, 185.84750612022455, 
1219.2489970638383, 774.6493568552127, 43.18476421160577, 284.5144691713891, 
1477.2691973195563, 1231.497088040995, 1345.237687107282, 181.90281250534227], 
"eval_len": [465, 95, 357, 260, 46, 126, 424, 365, 396, 82]}

 16%|█▌        | 159997/1000000 [1:03:42<4:26:52, 52.46it/s]global step 160000, trans_decision ep_re 890.1163834460452

{"global_step": 160000, "eval_re": [165.78626206830342, 36.784254419484505, 
1206.3046579165828, 1158.2980524592167, 1182.8389922852043, 952.0686293785134, 
1300.872867404027, 1263.4832801691339, 1173.8924510475495, 460.8343873124358], 
"eval_len": [83, 45, 356, 342, 347, 300, 375, 368, 346, 176]}

 17%|█▋        | 169999/1000000 [1:07:50<4:19:21, 53.34it/s]global step 170000, trans_decision ep_re 471.5845145960593

{"global_step": 170000, "eval_re": [372.6812306265197, 1238.1741170677105, 
1685.7919529479786, 106.48077943175333, 36.790995896456614, 252.7468336608078, 
38.2383151219757, 501.9057344392707, 268.95040289791854, 214.084783870202], 
"eval_len": [155, 410, 511, 62, 51, 121, 44, 225, 140, 110]}

 18%|█▊        | 179995/1000000 [1:11:53<4:16:30, 53.28it/s]global step 180000, trans_decision ep_re 1360.8652869262446

{"global_step": 180000, "eval_re": [2354.792592749207, 1845.9089523727735, 
720.8456599852, 1725.4733826021404, 1839.9846040888347, 1192.3285843589524, 
172.1890225913519, 1799.7917555689296, 1053.7193292850204, 903.618985660038], 
"eval_len": [672, 544, 249, 536, 509, 371, 83, 527, 336, 304]}

 19%|█▉        | 189997/1000000 [1:16:03<4:19:48, 51.96it/s]global step 190000, trans_decision ep_re 869.7926565542539

{"global_step": 190000, "eval_re": [124.90144088976442, 186.47486648769902, 
1327.873024791003, 610.407204393727, 1138.5774788152785, 1568.7972570512254, 
1512.945004953849, 1479.5646907928685, 706.5773136835463, 41.80828368357589], 
"eval_len": [84, 111, 397, 231, 337, 458, 426, 420, 241, 50]}

 20%|█▉        | 199999/1000000 [1:20:09<4:08:54, 53.57it/s]global step 200000, trans_decision ep_re 585.837115155437

{"global_step": 200000, "eval_re": [154.44823651714378, 54.95513297653318, 
2913.7705016496084, 35.398768321213105, 450.97985735435674, 37.35957914662917, 
35.57504298514702, 116.63699483983714, 189.79108265629154, 1869.45595510761], 
"eval_len": [75, 59, 857, 37, 186, 40, 38, 77, 117, 571]}

 21%|██        | 209999/1000000 [1:24:13<4:08:56, 52.89it/s]global step 210000, trans_decision ep_re 850.9436360481047

{"global_step": 210000, "eval_re": [129.17431816837166, 432.57655737761485, 
47.16127512852471, 1146.1949406630088, 1162.0698164263338, 1521.8584430796927, 
690.8671066688586, 2305.0119714204307, 711.171381580564, 363.3505499676477], 
"eval_len": [93, 183, 53, 360, 364, 492, 284, 681, 325, 185]}

 22%|██▏       | 219995/1000000 [1:28:22<4:05:14, 53.01it/s]global step 220000, trans_decision ep_re 873.8696572570813

{"global_step": 220000, "eval_re": [46.59125840914328, 89.48760862114514, 
2224.7959992832493, 157.26256937137714, 2126.8419481876135, 1558.299318538489, 
194.78909720792052, 35.86894127807382, 2192.2419423222814, 112.51788935151842], 
"eval_len": [38, 52, 618, 76, 594, 425, 97, 42, 632, 58]}

 23%|██▎       | 229997/1000000 [1:32:28<4:04:20, 52.52it/s]global step 230000, trans_decision ep_re 811.292645157541

{"global_step": 230000, "eval_re": [1133.248433403703, 1434.3072171285803, 
1201.8418583918174, 1196.8771576465901, 179.96856601933473, 1602.4223239615806, 
31.564518994789445, 1268.5824703301514, 27.05099682612437, 37.06290887273788], 
"eval_len": [360, 412, 374, 341, 94, 452, 26, 407, 36, 38]}

 24%|██▍       | 239999/1000000 [1:36:33<4:04:22, 51.83it/s]global step 240000, trans_decision ep_re 619.3066269856747

{"global_step": 240000, "eval_re": [30.614019715458184, 186.39904278286912, 
1306.0163093709104, 370.497486119823, 1185.7839542165775, 59.81935198581323, 
1092.210967228893, 47.19740237523295, 1554.6043735197466, 359.9233625414224], 
"eval_len": [35, 106, 427, 150, 360, 58, 391, 47, 474, 155]}

 25%|██▍       | 249999/1000000 [1:40:50<3:55:10, 53.15it/s]global step 250000, trans_decision ep_re 945.6410017710816

{"global_step": 250000, "eval_re": [917.1890379287834, 108.89725946226861, 
2817.871506611786, 1805.2924354215434, 41.16224255311679, 2166.1456958313047, 
44.56388805326491, 30.99966414421344, 270.43753846761274, 1253.8507492369215], 
"eval_len": [343, 64, 826, 577, 39, 673, 26, 38, 134, 400]}

 26%|██▌       | 259995/1000000 [1:44:46<3:52:34, 53.03it/s]global step 260000, trans_decision ep_re 405.1932196523119

{"global_step": 260000, "eval_re": [40.88024444581773, 868.88512648825, 
66.23648825960738, 155.79708745082115, 276.6401216939125, 1643.7125098031872, 
221.70985890744518, 152.80909789083876, 319.7441491731102, 305.5175124101289], 
"eval_len": [50, 300, 89, 74, 138, 443, 100, 75, 197, 128]}

 27%|██▋       | 269997/1000000 [1:48:51<3:48:19, 53.29it/s]global step 270000, trans_decision ep_re 998.0334604264514

{"global_step": 270000, "eval_re": [317.7291518471844, 351.0269500589168, 
34.72083046133604, 3046.9987179949485, 145.3464343356493, 43.36826912973783, 
1497.561384609884, 2078.4123317776653, 735.5171945824878, 1729.6533394667044], 
"eval_len": [146, 143, 41, 866, 72, 50, 458, 608, 241, 473]}

 28%|██▊       | 279999/1000000 [1:53:10<3:45:04, 53.32it/s]global step 280000, trans_decision ep_re 1035.7031008679994

{"global_step": 280000, "eval_re": [1316.707736878426, 1015.3646923505869, 
1646.4773584772292, 545.9707331045822, 777.0048219900907, 1332.5073928093998, 
423.8154439645428, 1783.0101540127, 1282.6167774083503, 233.55589768408674], 
"eval_len": [353, 316, 460, 239, 265, 384, 160, 487, 361, 109]}

 29%|██▉       | 289995/1000000 [1:57:08<3:46:38, 52.21it/s]global step 290000, trans_decision ep_re 293.1737010766884

{"global_step": 290000, "eval_re": [143.23801257030604, 224.72082935464113, 
222.07953750035662, 227.02672350951514, 222.53786937661707, 181.45221393356167, 
225.2145514285471, 58.257630734086646, 1324.468645195673, 102.74099716357958], 
"eval_len": [79, 102, 100, 102, 100, 86, 101, 39, 368, 55]}

 30%|██▉       | 299996/1000000 [2:01:20<3:39:48, 53.08it/s]global step 300000, trans_decision ep_re 775.2578014682924

{"global_step": 300000, "eval_re": [41.76486068718082, 1666.1256173803667, 
2144.388213383579, 242.95312585955145, 217.62711761936654, 208.65858873825772, 
1841.5584432755034, 44.4752062722464, 1297.141185706272, 47.885655760599434], 
"eval_len": [47, 461, 577, 107, 113, 100, 486, 26, 392, 29]}

 31%|███       | 309995/1000000 [2:05:16<3:36:11, 53.19it/s]global step 310000, trans_decision ep_re 757.0778417391216

{"global_step": 310000, "eval_re": [326.3057872731935, 1538.6003417094519, 
92.67361078853554, 423.0468846958715, 438.0873982552437, 1699.3624182772926, 
200.0318294392027, 1031.275064386441, 528.2365474420064, 1293.1585351239776], 
"eval_len": [159, 416, 51, 198, 162, 501, 97, 317, 186, 372]}

 32%|███▏      | 319995/1000000 [2:09:23<3:33:23, 53.11it/s]global step 320000, trans_decision ep_re 814.4706542740493

{"global_step": 320000, "eval_re": [112.41979841173632, 82.85655932520955, 
37.54306168490721, 151.334242157514, 54.53364036173082, 1089.18039000921, 
1357.257350165895, 1863.6058665877695, 1439.912605584972, 1956.0630284515478], 
"eval_len": [59, 64, 39, 92, 59, 375, 421, 506, 444, 544]}

 33%|███▎      | 329997/1000000 [2:13:29<3:29:26, 53.32it/s]global step 330000, trans_decision ep_re 659.9233976254135

{"global_step": 330000, "eval_re": [39.267682844057106, 68.74423487929899, 
27.3814614431932, 1276.3289079684344, 854.9271747451729, 1555.9844250958417, 
778.6632406926184, 306.4809790414205, 1398.6716937393103, 292.7841758047873], 
"eval_len": [47, 73, 32, 343, 262, 446, 270, 132, 383, 124]}

 34%|███▍      | 339999/1000000 [2:17:36<3:25:46, 53.46it/s]global step 340000, trans_decision ep_re 473.8382215770912

{"global_step": 340000, "eval_re": [116.08634338694284, 1176.3393471975069, 
1022.3676875281484, 39.54536006971033, 1146.4344931653993, 203.38194732434727, 
703.3036423245296, 24.30528575410419, 276.4532453608647, 30.164863659358918], 
"eval_len": [73, 346, 303, 40, 333, 99, 263, 28, 160, 32]}

 35%|███▍      | 349995/1000000 [2:21:51<3:24:18, 53.03it/s]global step 350000, trans_decision ep_re 826.0360568512373

{"global_step": 350000, "eval_re": [29.61408962057532, 1025.7876645559197, 
343.5383773219966, 1727.1443987550726, 958.7494717389867, 48.75829198517214, 
1076.1863723876206, 1990.759051088017, 39.00639538181179, 1020.8164556772007], 
"eval_len": [34, 323, 141, 479, 302, 57, 339, 585, 45, 328]}

 36%|███▌      | 359995/1000000 [2:26:01<3:21:28, 52.94it/s]global step 360000, trans_decision ep_re 1129.0501458532708

{"global_step": 360000, "eval_re": [1824.032495156405, 981.5466957016246, 
611.8811639342417, 403.46300888732287, 59.27865779450482, 1561.831830526834, 
121.19961395463982, 2064.273884214567, 319.1758804374596, 3343.818227925108], 
"eval_len": [505, 304, 218, 188, 63, 470, 73, 567, 134, 939]}

 37%|███▋      | 369995/1000000 [2:29:57<3:21:44, 52.05it/s]global step 370000, trans_decision ep_re 947.0729614912007

{"global_step": 370000, "eval_re": [34.56821558488462, 1741.161698929226, 
1955.6084297817836, 1645.5524038169906, 1605.2486209519584, 1787.127842191544, 
363.40184798822617, 279.2271028531446, 24.499870877044977, 34.333581937203405], 
"eval_len": [41, 504, 530, 428, 416, 655, 167, 129, 31, 36]}

 38%|███▊      | 379997/1000000 [2:34:05<3:16:45, 52.52it/s]global step 380000, trans_decision ep_re 723.0580547858581

{"global_step": 380000, "eval_re": [1682.7160488305578, 2294.626156696987, 
37.62027364097169, 37.03537561925173, 1390.3949810011807, 260.2501353591673, 
281.83258464704494, 693.6463585070036, 279.64887515915143, 272.80975839726483], 
"eval_len": [484, 626, 26, 39, 395, 118, 151, 230, 135, 136]}

 39%|███▉      | 389997/1000000 [2:38:11<3:09:34, 53.63it/s]global step 390000, trans_decision ep_re 838.7331060895485

{"global_step": 390000, "eval_re": [51.398140822699816, 113.07274516293076, 
51.42318563753912, 908.8471464606052, 763.853791775676, 1509.6060432772829, 
1620.3729597578758, 892.9623629315931, 1725.9594283291349, 749.8352567401481], 
"eval_len": [31, 67, 31, 309, 268, 409, 423, 285, 468, 245]}

 40%|███▉      | 399999/1000000 [2:42:17<3:08:46, 52.97it/s]global step 400000, trans_decision ep_re 874.18271330539

{"global_step": 400000, "eval_re": [1677.7198410909687, 1282.0480796389497, 
41.621917601889926, 1243.885531306927, 1679.4787105777978, 414.6017879135236, 
658.9573886162541, 539.4530878806867, 84.80640582876023, 1119.2543825981434], 
"eval_len": [445, 384, 42, 357, 455, 155, 238, 191, 50, 341]}

 41%|████      | 409995/1000000 [2:46:23<3:04:38, 53.26it/s]global step 410000, trans_decision ep_re 261.3481152012758

{"global_step": 410000, "eval_re": [35.579958995212195, 641.8100347357173, 
483.9510978260602, 414.49895583826344, 137.7721853171291, 136.9441374648946, 
407.69720339790837, 38.55272968220074, 178.54321877664268, 138.13162997872882], 
"eval_len": [41, 239, 212, 150, 70, 69, 155, 46, 108, 88]}

 42%|████▏     | 419997/1000000 [2:50:25<3:02:33, 52.95it/s]global step 420000, trans_decision ep_re 894.3166449061504

{"global_step": 420000, "eval_re": [480.19272950836336, 687.7201385914448, 
29.159020073193208, 1267.3938531694312, 1363.3438035000024, 961.341353730416, 
1590.5825343507624, 1229.2588602423994, 584.5732202884831, 749.6009356070088], 
"eval_len": [192, 220, 32, 344, 366, 303, 412, 324, 200, 240]}

 43%|████▎     | 429999/1000000 [2:54:31<3:01:23, 52.37it/s]global step 430000, trans_decision ep_re 887.6234474427387

{"global_step": 430000, "eval_re": [1631.591111198404, 368.548907122617, 
1933.991915611722, 188.34427530776657, 311.19631642150824, 813.3470051759707, 
2825.2227208785935, 569.0073413374498, 39.1504509154386, 195.834430457916], 
"eval_len": [455, 141, 534, 87, 124, 318, 773, 194, 43, 96]}

 44%|████▍     | 439995/1000000 [2:58:39<3:04:23, 50.62it/s]global step 440000, trans_decision ep_re 555.549975894828

{"global_step": 440000, "eval_re": [27.799579945722165, 895.0816298142879, 
75.4713522682453, 206.2767732669682, 838.1460026558849, 968.6146065991796, 
1202.390288840984, 846.0634123576497, 34.99363901468376, 460.6624741846732], 
"eval_len": [33, 315, 72, 98, 268, 292, 367, 266, 34, 164]}

 45%|████▍     | 449997/1000000 [3:02:44<2:54:25, 52.55it/s]global step 450000, trans_decision ep_re 532.8809615279014

{"global_step": 450000, "eval_re": [211.10533412510432, 62.260850096159956, 
48.71688648313573, 1568.6665059436637, 1395.2146422452872, 344.7914655980855, 
483.8568872198805, 26.46610539069914, 826.6407342475245, 361.09020392947303], 
"eval_len": [92, 40, 47, 468, 397, 144, 177, 32, 261, 146]}

 46%|████▌     | 459999/1000000 [3:06:49<2:50:31, 52.78it/s]global step 460000, trans_decision ep_re 624.4933389616184

{"global_step": 460000, "eval_re": [66.46341295558007, 208.25524910608675, 
1229.533242721638, 1230.5659141307458, 206.83203626229746, 1298.5424612090176, 
273.51976459154383, 270.1447209796675, 125.7500885991733, 1335.3264990604334], 
"eval_len": [94, 91, 340, 336, 99, 381, 116, 122, 107, 376]}

 47%|████▋     | 469999/1000000 [3:10:54<2:46:49, 52.95it/s]global step 470000, trans_decision ep_re 525.9967542151302

{"global_step": 470000, "eval_re": [100.45438833320523, 782.465900484262, 
119.28424897384845, 53.79390720114807, 1343.045135146152, 669.4810598155323, 
232.05677869833798, 351.3765562674188, 212.6888788130975, 1395.3206884183005], 
"eval_len": [55, 245, 74, 61, 404, 217, 121, 135, 101, 433]}

 48%|████▊     | 479995/1000000 [3:14:59<2:41:49, 53.56it/s]global step 480000, trans_decision ep_re 361.97884476331194

{"global_step": 480000, "eval_re": [1179.053086054302, 37.07891200330539, 
682.1443734318354, 438.2110059314561, 135.75146857891627, 26.717910713477703, 
482.4246307902854, 100.27237001163687, 377.9093839769278, 160.2253061409762], 
"eval_len": [328, 37, 269, 157, 82, 31, 174, 77, 161, 85]}

 49%|████▉     | 489997/1000000 [3:19:04<2:42:43, 52.24it/s]global step 490000, trans_decision ep_re 944.3551405775145

{"global_step": 490000, "eval_re": [869.0752154598806, 650.696782392185, 
1865.0208646189358, 2464.6489601791704, 363.9883703281579, 242.7971659222875, 
151.90704272423594, 1497.2964182101555, 1239.9332539687057, 98.1873319714301], 
"eval_len": [301, 245, 535, 646, 138, 137, 73, 393, 375, 64]}

 50%|████▉     | 499999/1000000 [3:23:21<2:36:38, 53.20it/s]global step 500000, trans_decision ep_re 1030.5915815961912

{"global_step": 500000, "eval_re": [709.3869488581081, 887.4711473959509, 
1209.3058336894146, 1649.271314569995, 1329.4003511248166, 257.5432643747731, 
1310.9232725360246, 979.4967469142206, 37.3388413267341, 1935.778095171876], 
"eval_len": [252, 272, 320, 477, 344, 109, 356, 280, 38, 542]}

 51%|█████     | 509995/1000000 [3:27:18<2:35:45, 52.43it/s]global step 510000, trans_decision ep_re 906.7189950620293

{"global_step": 510000, "eval_re": [738.627270645547, 457.55778950262936, 
156.44922596732124, 48.874979299394255, 691.2129075994238, 674.5542898700135, 
1335.751330014046, 1253.0809979478356, 1428.2939769326642, 2282.787182841418], 
"eval_len": [256, 161, 74, 55, 243, 234, 366, 328, 376, 557]}

 52%|█████▏    | 519997/1000000 [3:31:25<2:31:08, 52.93it/s]global step 520000, trans_decision ep_re 952.2388289259225

{"global_step": 520000, "eval_re": [2321.1672294454866, 1205.1923929650868, 
37.76407979349028, 1977.7530422853167, 57.07011181262294, 519.1074534723722, 
985.9370127984034, 1927.7994417365246, 267.0666445508339, 223.5308803990859], 
"eval_len": [595, 351, 40, 527, 35, 219, 294, 502, 110, 118]}

 53%|█████▎    | 529999/1000000 [3:35:32<2:28:09, 52.87it/s]global step 530000, trans_decision ep_re 816.1688110753144

{"global_step": 530000, "eval_re": [328.9330950892777, 49.41686537749602, 
1370.8995832276505, 1103.0164918766384, 32.50634956327404, 1405.761360801761, 
1715.629807173692, 469.8012446887611, 73.75500625868223, 1611.9683066959103], 
"eval_len": [128, 55, 373, 332, 36, 380, 446, 165, 77, 438]}

 54%|█████▍    | 539995/1000000 [3:39:38<2:24:51, 52.93it/s]global step 540000, trans_decision ep_re 358.9949748849891

{"global_step": 540000, "eval_re": [678.6448516319166, 38.58200402061138, 
2223.93983773207, 300.18150058606534, 110.55196294375125, 85.58870297408197, 
43.95974140677889, 10.299018883813625, 3.861923532338488, 94.34020513846322], 
"eval_len": [223, 50, 583, 116, 71, 51, 35, 15, 17, 58]}

 55%|█████▍    | 549995/1000000 [3:43:51<2:19:47, 53.65it/s]global step 550000, trans_decision ep_re 721.2860974175946

{"global_step": 550000, "eval_re": [23.253082263611752, 205.26170709202302, 
844.2014202623659, 690.5600130169464, 1300.9255985271022, 59.55431559733956, 
954.3728164357999, 26.29319252373955, 973.6686855542229, 2134.7701429027948], 
"eval_len": [27, 105, 260, 208, 381, 62, 272, 35, 298, 545]}

 56%|█████▌    | 559995/1000000 [3:47:46<2:17:58, 53.15it/s]global step 560000, trans_decision ep_re 750.0455245507937

{"global_step": 560000, "eval_re": [1434.714926626733, 489.2303464522618, 
1462.9467930316778, 315.33394104837345, 1673.175823698263, 581.6230295304267, 
40.42897394910552, 191.9235309889312, 1165.928624378383, 145.1492558037829], 
"eval_len": [367, 177, 373, 126, 472, 195, 39, 104, 311, 69]}

 57%|█████▋    | 569997/1000000 [3:52:01<2:16:18, 52.58it/s]global step 570000, trans_decision ep_re 1058.2776807604562

{"global_step": 570000, "eval_re": [476.01413824872725, 821.2378905225465, 
43.82914011745215, 1306.7598479888745, 2232.2298991795624, 2162.6466213331823, 
2219.487612445194, 638.4838735463904, 42.419380690593, 639.668403532037], 
"eval_len": [168, 269, 43, 385, 586, 583, 577, 212, 45, 213]}

 58%|█████▊    | 579995/1000000 [3:56:11<2:12:12, 52.95it/s]global step 580000, trans_decision ep_re 1130.3749190260692

{"global_step": 580000, "eval_re": [2469.814892133946, 2138.867961560703, 
711.9273568191138, 455.01260738725193, 19.172606671509037, 2362.549122642058, 
1811.7838207426487, 906.9542151513497, 384.7156428239866, 42.95096432812689], 
"eval_len": [691, 554, 230, 172, 21, 580, 477, 283, 173, 45]}

 59%|█████▉    | 589995/1000000 [4:00:06<2:10:35, 52.32it/s]global step 590000, trans_decision ep_re 451.98273415220376

{"global_step": 590000, "eval_re": [600.5810366964798, 27.26006887720996, 
63.64525652276973, 31.75216323936661, 30.815350834845805, 37.816576036703, 
896.2000915182682, 1864.8183400527273, 944.852026364517, 22.08643137914956], 
"eval_len": [219, 28, 38, 39, 33, 45, 282, 484, 287, 18]}

 60%|█████▉    | 599997/1000000 [4:04:09<2:05:38, 53.06it/s]global step 600000, trans_decision ep_re 661.4122850833546

{"global_step": 600000, "eval_re": [716.7479353224625, 716.7987774708705, 
89.85358674735572, 611.1984769132424, 736.610374385341, 599.4166492812019, 
71.67042265323711, 600.2288250716077, 895.6504687505372, 1575.9473342376896], 
"eval_len": [234, 232, 58, 203, 250, 228, 43, 199, 296, 398]}

 61%|██████    | 609999/1000000 [4:08:14<2:02:05, 53.24it/s]global step 610000, trans_decision ep_re 750.0905767426218

{"global_step": 610000, "eval_re": [33.96800382223569, 1399.2902768943645, 
489.9751173173806, 666.8235066507535, 554.6125114426254, 1337.887002451228, 
92.27192242200718, 458.91959364998877, 799.7835054598355, 1667.3743273157993], 
"eval_len": [23, 360, 176, 218, 179, 349, 53, 162, 252, 471]}

 62%|██████▏   | 619995/1000000 [4:12:20<2:01:18, 52.21it/s]global step 620000, trans_decision ep_re 674.3914261294092

{"global_step": 620000, "eval_re": [1430.7898847930326, 476.1214789945631, 
42.570190801526216, 89.61591662972566, 1303.1095234089357, 1648.7620415676727, 
898.4198607130514, 68.82498760536672, 750.8957462830299, 34.80463049718833], 
"eval_len": [392, 174, 48, 88, 334, 408, 269, 40, 251, 43]}

 63%|██████▎   | 629997/1000000 [4:16:26<1:55:08, 53.56it/s]global step 630000, trans_decision ep_re 816.5284461206013

{"global_step": 630000, "eval_re": [482.7551949178935, 1084.3026456351897, 
42.632653179217634, 590.0577224216697, 1635.5604151357988, 1915.626502738118, 
1388.165812544387, 36.91911044122335, 967.1479268325243, 22.116477359990647], 
"eval_len": [172, 338, 48, 188, 464, 512, 420, 37, 314, 26]}

 64%|██████▍   | 639999/1000000 [4:20:32<1:52:34, 53.30it/s]global step 640000, trans_decision ep_re 678.8357774480334

{"global_step": 640000, "eval_re": [718.2732690216274, 1670.4720305042651, 
831.9857989934551, 675.0710393981799, 973.9032499585182, 29.08370100524708, 
27.54375636687208, 721.6810070638252, 931.4200182663938, 208.9239039019494], 
"eval_len": [235, 455, 256, 223, 293, 35, 29, 251, 268, 95]}

 65%|██████▍   | 649995/1000000 [4:24:37<1:49:59, 53.03it/s]global step 650000, trans_decision ep_re 650.2251471575707

{"global_step": 650000, "eval_re": [72.05305427014076, 748.3289325074636, 
128.21568353991645, 369.13642423400324, 1265.372286233395, 364.6282213963125, 
993.4592378638602, 1512.9288225785995, 18.06310486389982, 1030.0657040881167], 
"eval_len": [61, 259, 65, 144, 326, 133, 280, 396, 17, 281]}

 66%|██████▌   | 659995/1000000 [4:28:52<1:46:24, 53.26it/s]global step 660000, trans_decision ep_re 1115.9097618265346

{"global_step": 660000, "eval_re": [64.17336307733052, 499.472793096233, 
2107.624286835852, 2259.8338262969355, 91.56214924706926, 2196.913779868082, 
657.1216800725584, 1926.0557065756886, 535.2553020345127, 821.0847311610819], 
"eval_len": [42, 180, 538, 550, 52, 567, 211, 504, 180, 271]}

 67%|██████▋   | 669995/1000000 [4:32:48<1:45:26, 52.17it/s]global step 670000, trans_decision ep_re 1161.5105571199822

{"global_step": 670000, "eval_re": [2056.5858505448414, 476.8550634137719, 
2728.175274959216, 109.70361374237133, 1324.2280009939282, 1644.252307095278, 
133.91716009416922, 794.3481252074932, 546.315124509703, 1800.7250506390496], 
"eval_len": [522, 169, 700, 60, 366, 458, 67, 247, 220, 477]}

 68%|██████▊   | 679997/1000000 [4:36:56<1:40:58, 52.82it/s]global step 680000, trans_decision ep_re 1245.4564437549222

{"global_step": 680000, "eval_re": [1774.5613862863706, 1394.1334490169288, 
1224.0648735123339, 956.8607195748717, 1349.8899041554644, 507.86236096954474, 
1689.3579656368613, 20.573850866925063, 1302.2713169480664, 2234.9886105818555],
"eval_len": [449, 392, 328, 295, 363, 183, 428, 22, 361, 595]}

 69%|██████▉   | 689997/1000000 [4:41:05<1:38:23, 52.51it/s]global step 690000, trans_decision ep_re 734.7329445579608

{"global_step": 690000, "eval_re": [201.14669096117262, 113.35550108884506, 
428.51466776227215, 48.22416628063521, 263.1429508885691, 43.37291174578789, 
605.8330785888888, 1581.664042571004, 2038.9028486781378, 2023.1725870142943], 
"eval_len": [85, 105, 168, 55, 131, 34, 197, 464, 541, 551]}

 70%|██████▉   | 699999/1000000 [4:45:10<1:33:59, 53.19it/s]global step 700000, trans_decision ep_re 768.8576306663235

{"global_step": 700000, "eval_re": [43.51823685468816, 1961.9465850447134, 
486.6627818011866, 1699.3027700993002, 95.74821514771752, 2155.5840902245154, 
687.9603898286801, 342.48886784342795, 162.41530936787817, 52.94906045112756], 
"eval_len": [53, 538, 171, 454, 54, 546, 224, 134, 107, 50]}

 71%|███████   | 709995/1000000 [4:49:15<1:30:59, 53.12it/s]global step 710000, trans_decision ep_re 327.4644655927662

{"global_step": 710000, "eval_re": [274.6127621560849, 834.3259491914215, 
103.18635773175203, 286.5305676791301, 78.97628667373142, 288.2608823453192, 
211.93461269082076, 287.1924114610733, 823.0060585673831, 86.61876743094555], 
"eval_len": [108, 260, 57, 111, 45, 117, 96, 111, 255, 52]}

 72%|███████▏  | 719997/1000000 [4:53:18<1:27:57, 53.06it/s]global step 720000, trans_decision ep_re 888.4301054874386

{"global_step": 720000, "eval_re": [1383.7576563276316, 41.276496983456695, 
2686.215145686023, 50.02211925004145, 2393.1442229328873, 141.97725745281474, 
93.59741072022572, 1617.3567923773733, 430.31932870564435, 46.63462443828809], 
"eval_len": [425, 47, 744, 49, 613, 67, 53, 416, 151, 36]}

 73%|███████▎  | 729997/1000000 [4:57:24<1:24:46, 53.08it/s]global step 730000, trans_decision ep_re 676.1825191622593

{"global_step": 730000, "eval_re": [1200.8758630279565, 37.81897778992783, 
308.62654301023133, 197.3577471548536, 193.54989031863323, 3.939554038855682, 
2857.551583428593, 624.1410375516944, 642.5176997150068, 695.4462955868406], 
"eval_len": [351, 42, 147, 112, 109, 17, 725, 205, 228, 269]}

 74%|███████▍  | 739999/1000000 [5:01:42<1:21:49, 52.96it/s]global step 740000, trans_decision ep_re 1051.9476506682686

{"global_step": 740000, "eval_re": [1739.8312322613806, 1658.7906194296022, 
2347.7065624361476, 1613.9810358158659, 338.242779523247, 1448.582727655276, 
543.315378909075, 30.085518101866814, 463.131830822708, 335.8088217275172], 
"eval_len": [446, 477, 588, 425, 125, 405, 208, 34, 157, 182]}

 75%|███████▍  | 749995/1000000 [5:05:37<1:18:19, 53.20it/s]global step 750000, trans_decision ep_re 981.8416900748832

{"global_step": 750000, "eval_re": [1000.5715088371086, 1586.5186330395545, 
1570.6440119227166, 1561.61004009508, 559.9802842327139, 1628.8736368970272, 
329.6821393690708, 63.53885840728882, 840.6070842406566, 676.3907037076152], 
"eval_len": [304, 410, 386, 415, 184, 424, 122, 58, 263, 212]}

 76%|███████▌  | 759997/1000000 [5:09:44<1:16:42, 52.15it/s]global step 760000, trans_decision ep_re 838.8073221412924

{"global_step": 760000, "eval_re": [131.9792610273642, 786.952938468332, 
577.4497911804689, 537.5746327449988, 45.819587469491054, 476.5129478467376, 
706.7066131265858, 2280.730528926807, 999.4816855452718, 1844.8652350768664], 
"eval_len": [64, 260, 219, 179, 60, 173, 250, 606, 288, 463]}

 77%|███████▋  | 769999/1000000 [5:14:02<1:12:08, 53.14it/s]global step 770000, trans_decision ep_re 903.3717401624908

{"global_step": 770000, "eval_re": [968.8941916621313, 918.386258453817, 
55.38068752411721, 31.395795507125662, 2040.5932937227842, 1949.8448825985643, 
228.41994457573367, 53.18647058002486, 1820.8851431299408, 966.7307338706688], 
"eval_len": [260, 268, 56, 44, 530, 468, 95, 54, 481, 287]}

 78%|███████▊  | 779994/1000000 [5:18:12<1:09:29, 52.76it/s]global step 780000, trans_decision ep_re 1128.5920847095717

{"global_step": 780000, "eval_re": [1243.7951847121678, 1051.934677628708, 
666.6277919523684, 115.544582509793, 539.4730620045476, 1499.7233742480573, 
2191.061723633234, 1723.9063812036638, 1158.5885712709062, 1095.265497932271], 
"eval_len": [388, 308, 214, 72, 203, 436, 576, 453, 338, 312]}

 79%|███████▉  | 789999/1000000 [5:22:09<1:05:39, 53.31it/s]global step 790000, trans_decision ep_re 579.5105814677117

{"global_step": 790000, "eval_re": [2030.4777459463123, 158.6030371090863, 
1243.4609385844756, 179.51828381352013, 480.3917515166617, 60.87126290028254, 
45.02278482643191, 297.24901740629224, 811.4557139586765, 488.0552786153776], 
"eval_len": [506, 74, 360, 84, 168, 78, 52, 114, 250, 186]}

 80%|███████▉  | 799995/1000000 [5:26:13<1:02:55, 52.97it/s]global step 800000, trans_decision ep_re 897.3279126503987

{"global_step": 800000, "eval_re": [654.0001198884186, 753.0635797183506, 
59.09152760325624, 422.18833195286436, 1264.9517684678794, 40.01484306732121, 
2865.316988022321, 16.766739083092325, 2877.8834943278357, 20.001734372645913], 
"eval_len": [220, 242, 63, 148, 383, 51, 788, 21, 734, 20]}

 81%|████████  | 809995/1000000 [5:30:22<1:00:46, 52.10it/s]global step 810000, trans_decision ep_re 653.4640351194907

{"global_step": 810000, "eval_re": [17.359156914983, 1804.1746996055178, 
99.3901382649641, 440.5434889520723, 86.77586490167826, 48.58860917157824, 
2443.1639805855407, 1257.5130824017529, 250.82163066540232, 86.3096997314176], 
"eval_len": [20, 471, 55, 175, 51, 50, 634, 378, 111, 51]}

 82%|████████▏ | 819995/1000000 [5:34:27<56:32, 53.05it/s]global step 820000, trans_decision ep_re 1202.5163668995824

{"global_step": 820000, "eval_re": [1084.7662630161585, 975.5164827127979, 
1620.2533289941211, 1198.3088413342725, 1667.0966779754622, 1186.3665008802725, 
1362.415909181608, 1342.4696074989936, 186.48079687371975, 1401.4892605284188], 
"eval_len": [328, 283, 421, 328, 434, 304, 343, 339, 80, 373]}

 83%|████████▎ | 829995/1000000 [5:38:36<54:03, 52.41it/s]global step 830000, trans_decision ep_re 546.0242392307491

{"global_step": 830000, "eval_re": [864.115695033702, 595.740727711134, 
52.9277757605462, 1228.072843723055, 640.0390154046534, 130.47815227221662, 
1281.951798965018, 176.3349398159754, 452.8554799666776, 37.72596365451327], 
"eval_len": [260, 210, 72, 337, 204, 69, 332, 79, 170, 52]}

 84%|████████▍ | 839995/1000000 [5:42:39<50:20, 52.97it/s]global step 840000, trans_decision ep_re 984.8078853065821

{"global_step": 840000, "eval_re": [41.27813674586997, 679.4474739218488, 
1716.8882556017459, 78.4616225277419, 425.06598949209064, 1014.7287237743317, 
1987.62793305975, 1868.544805505356, 337.28238456087956, 1698.7535278762086], 
"eval_len": [46, 227, 450, 45, 149, 304, 515, 457, 125, 441]}

 85%|████████▍ | 849995/1000000 [5:46:46<47:15, 52.90it/s]global step 850000, trans_decision ep_re 1321.4049060629209

{"global_step": 850000, "eval_re": [2143.2356430308973, 1509.0111062757837, 
2290.163050809295, 2575.869703741161, 9.486024123889251, 1747.6143940246618, 
104.25123197770348, 872.7824717524996, 1628.0930755759273, 333.54235931738856], 
"eval_len": [537, 394, 575, 641, 14, 467, 57, 260, 413, 122]}

 86%|████████▌ | 859997/1000000 [5:50:55<43:38, 53.46it/s]global step 860000, trans_decision ep_re 525.973064798821

{"global_step": 860000, "eval_re": [495.6341862739179, 184.15240199321173, 
828.230336651956, 1854.4974322624164, 99.40295582989542, 53.022176523973286, 
56.904631478501315, 1566.146040289106, 99.01568833741158, 22.724798347819316], 
"eval_len": [210, 99, 253, 471, 61, 51, 64, 423, 67, 25]}

 87%|████████▋ | 869997/1000000 [5:54:59<41:33, 52.13it/s]global step 870000, trans_decision ep_re 775.040104228657

{"global_step": 870000, "eval_re": [203.72937797940622, 95.42411910176106, 
1263.5933301370712, 189.12855011559762, 1831.1545867680704, 1623.6626253158643, 
188.38913219946295, 1653.922852949009, 639.3708160813643, 62.02565163896391], 
"eval_len": [93, 101, 357, 105, 455, 394, 91, 412, 199, 76]}

 88%|████████▊ | 879999/1000000 [5:59:04<37:48, 52.89it/s]global step 880000, trans_decision ep_re 1224.6239220894254

{"global_step": 880000, "eval_re": [1756.7727916031665, 1609.6908647385485, 
1526.004183386982, 1273.8534038196835, 1754.6207436435323, 1885.487852549872, 
992.4581924061471, 244.07409570617398, 454.675539912472, 748.6015531276764], 
"eval_len": [432, 401, 381, 355, 451, 491, 300, 103, 156, 236]}

 89%|████████▉ | 889995/1000000 [6:03:11<34:34, 53.03it/s]global step 890000, trans_decision ep_re 683.9989413758651

{"global_step": 890000, "eval_re": [84.66562285053821, 2656.6892928211632, 
292.06689674486444, 373.3543156261206, 479.36849182631823, 208.74988466745452, 
1341.6059163586724, 910.425737456034, 442.23908946923154, 50.8241659382542], 
"eval_len": [95, 737, 133, 135, 168, 102, 397, 290, 171, 42]}

 90%|████████▉ | 899997/1000000 [6:07:16<31:13, 53.38it/s]global step 900000, trans_decision ep_re 569.644097449968

{"global_step": 900000, "eval_re": [789.459790651491, 463.29000608964844, 
28.9579832501698, 79.59422118389071, 2121.4040653441944, 136.29383431359008, 
1885.9545549165389, 111.11961080577824, 46.06733645452164, 34.29957148985622], 
"eval_len": [245, 160, 36, 49, 551, 72, 496, 59, 60, 36]}

 91%|█████████ | 909999/1000000 [6:11:33<28:06, 53.35it/s]global step 910000, trans_decision ep_re 1058.3786433577545

{"global_step": 910000, "eval_re": [2204.628873731164, 1849.2991946335717, 
834.9722152676221, 46.8866918677925, 2744.144297896649, 311.2354345236994, 
24.785712350478644, 570.8956458489475, 50.15028973388759, 1946.788077723733], 
"eval_len": [547, 458, 252, 48, 715, 115, 31, 179, 49, 499]}

 92%|█████████▏| 919995/1000000 [6:15:27<25:33, 52.19it/s]global step 920000, trans_decision ep_re 978.3836801128679

{"global_step": 920000, "eval_re": [34.021029680870164, 2174.741115396922, 
1964.4828991134816, 28.75120658078382, 1649.879995783812, 39.97014916195502, 
839.8036923584375, 858.1532983263544, 2152.6025731114096, 41.43084161465155], 
"eval_len": [36, 552, 524, 32, 467, 50, 230, 239, 516, 46]}

 93%|█████████▎| 929997/1000000 [6:19:33<21:57, 53.11it/s]global step 930000, trans_decision ep_re 304.8727848686168

{"global_step": 930000, "eval_re": [139.5728888462014, 168.9443202555375, 
128.56614659159763, 44.4692307925357, 126.27201322925893, 1725.8234553016214, 
203.45513613855633, 24.36833323141133, 350.1037069192453, 137.15261738020232], 
"eval_len": [72, 84, 114, 26, 70, 472, 104, 33, 132, 70]}

 94%|█████████▍| 939999/1000000 [6:23:35<18:58, 52.69it/s]global step 940000, trans_decision ep_re 542.0676555900299

{"global_step": 940000, "eval_re": [1167.3048756393207, 1221.266433986995, 
1646.954861084462, 74.81534585135257, 17.36085740938094, 1102.4500781868626, 
35.724454929014556, 34.50391307518249, 42.3268342827285, 77.96890145499955], 
"eval_len": [287, 306, 455, 76, 20, 276, 32, 42, 47, 80]}

 95%|█████████▍| 949995/1000000 [6:27:38<15:49, 52.66it/s]global step 950000, trans_decision ep_re 259.06170110848893

{"global_step": 950000, "eval_re": [328.4565726453093, 289.29912309352875, 
292.8033165829431, 299.5088709972375, 288.34509848013914, 157.42057083809786, 
67.57095067759202, 292.0847120210067, 290.6148326221772, 284.5129631268579], 
"eval_len": [146, 110, 111, 113, 110, 73, 64, 111, 110, 109]}

 96%|█████████▌| 959996/1000000 [6:31:40<12:30, 53.32it/s]global step 960000, trans_decision ep_re 728.1667397937703

{"global_step": 960000, "eval_re": [1747.3243400065135, 622.2867412462671, 
44.390376908138556, 1534.1044526076614, 364.8400361159685, 1670.5407848258822, 
222.64648738999446, 9.670335274413008, 33.01503509373687, 1032.848808469127], 
"eval_len": [414, 194, 48, 395, 132, 436, 92, 14, 38, 267]}

 97%|█████████▋| 969997/1000000 [6:35:45<09:24, 53.15it/s]global step 970000, trans_decision ep_re 750.7787869907248

{"global_step": 970000, "eval_re": [1166.3343573476398, 470.88760561565454, 
476.4784339968715, 29.194104107202605, 804.0614970299517, 279.13076991801216, 
1899.957166505122, 43.13456648681293, 880.143925225347, 1458.4654436746341], 
"eval_len": [290, 156, 174, 32, 257, 108, 491, 52, 224, 354]}

 98%|█████████▊| 979999/1000000 [6:39:50<06:15, 53.30it/s]global step 980000, trans_decision ep_re 564.7991466349001

{"global_step": 980000, "eval_re": [304.2233698374705, 868.9509130567545, 
396.83835727309645, 883.5720207081064, 873.6401062796673, 64.64752051243299, 
369.72279945643055, 914.1473981931338, 99.1117740921836, 873.1372069397248], 
"eval_len": [114, 248, 143, 240, 236, 38, 149, 242, 62, 235]}

 99%|█████████▉| 989995/1000000 [6:43:53<03:08, 53.05it/s]global step 990000, trans_decision ep_re 640.9880613114057

{"global_step": 990000, "eval_re": [348.86193782699945, 36.17405092143565, 
394.60577885448225, 859.0154794291325, 820.6272590058198, 269.30916488569926, 
60.23907454759921, 517.8098239900409, 1623.0050500411016, 1480.2329936117458], 
"eval_len": [125, 38, 139, 243, 230, 113, 73, 167, 393, 359]}

100%|█████████▉| 999997/1000000 [6:47:56<00:00, 53.10it/s]global step 1000000, trans_decision ep_re 794.7411461449809

{"global_step": 1000000, "eval_re": [2455.0497951423117, 75.26241111779487, 
48.952215551623944, 1410.1074931844992, 1532.2052865673857, 156.88792517185985, 
1814.8543708350007, 252.72505149410304, 175.95881309661723, 25.40809928861284], 
"eval_len": [636, 76, 47, 344, 372, 71, 436, 101, 90, 29]}

100%|██████████| 1000000/1000000 [6:48:07<00:00, 40.84it/s]
