
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9995/1000000 [03:01<6:39:02, 41.35it/s]global step 10000, trans_decision ep_re 121.46143667242953

{"global_step": 10000, "eval_re": [95.3185666239368, 142.75148377602855, 
157.09782091422736, 101.22145560749702, 162.69914452896182, 97.97673100960905, 
88.81930377340238, 82.2072852611555, 104.71672536891992, 181.80584986055675], 
"eval_len": [72, 94, 96, 74, 99, 69, 75, 66, 76, 111]}

  2%|▏         | 19995/1000000 [08:46<6:31:55, 41.67it/s]global step 20000, trans_decision ep_re 126.71126072085595

{"global_step": 20000, "eval_re": [244.41670630708947, 76.49460830549488, 
88.3694435682435, 171.91435067799404, 104.30061400116305, 125.3285551524374, 
110.52849394321403, 82.23283078919752, 22.536142549643316, 240.99086191408222], 
"eval_len": [126, 56, 64, 107, 72, 90, 81, 63, 27, 125]}

  3%|▎         | 29997/1000000 [14:28<8:02:26, 33.51it/s]global step 30000, trans_decision ep_re 114.91022627106267

{"global_step": 30000, "eval_re": [25.282760857437754, 118.81060981347497, 
100.31497527724278, 162.15527846434279, 106.99517063756227, 127.32377712953503, 
117.65514756145997, 121.20710289554252, 126.65387558876023, 142.7035644852684], 
"eval_len": [32, 102, 74, 115, 82, 95, 96, 93, 101, 108]}

  4%|▍         | 39998/1000000 [20:05<8:02:54, 33.13it/s]global step 40000, trans_decision ep_re 107.1284406645477

{"global_step": 40000, "eval_re": [91.52016058987799, 114.00131928975476, 
75.06766582595651, 159.37455623044784, 182.2918293928829, 107.12802070667838, 
127.45658991247019, 87.55177749166668, 26.106006801698488, 100.78648040404322], 
"eval_len": [74, 77, 65, 99, 113, 81, 96, 71, 28, 75]}

  5%|▍         | 49999/1000000 [25:51<7:46:12, 33.96it/s]global step 50000, trans_decision ep_re 57.436558697207616

{"global_step": 50000, "eval_re": [25.597926919925076, 70.0825219474797, 
65.73426481619009, 69.07214397389095, 68.3728153164291, 76.65090135616327, 
25.025135161056113, 81.95217944783757, 46.63263329386338, 45.24506473924093], 
"eval_len": [29, 57, 56, 57, 52, 59, 28, 64, 43, 45]}

  6%|▌         | 59997/1000000 [31:26<6:20:10, 41.21it/s]global step 60000, trans_decision ep_re 210.43359665383633

{"global_step": 60000, "eval_re": [27.464156758689466, 124.66286906720792, 
155.1699424848394, 228.82708625581265, 252.9616206994156, 293.0484097237513, 
325.5401613775345, 268.2713099420219, 268.2987221387601, 160.09168809033036], 
"eval_len": [29, 87, 91, 113, 125, 137, 154, 127, 133, 97]}

  7%|▋         | 69997/1000000 [36:37<6:11:10, 41.76it/s]global step 70000, trans_decision ep_re 54.57475775050993

{"global_step": 70000, "eval_re": [48.46574226911924, 33.94373575458603, 
98.11165356384811, 40.24564517105269, 40.58578082481099, 92.83679630299697, 
24.49709614260572, 32.70819004565814, 34.28672812299341, 100.06620930742802], 
"eval_len": [45, 39, 64, 41, 42, 70, 29, 42, 39, 70]}

  8%|▊         | 79997/1000000 [41:46<6:05:14, 41.98it/s]global step 80000, trans_decision ep_re 205.41848405126402

{"global_step": 80000, "eval_re": [118.75766922143914, 21.803303367531974, 
146.15363943933886, 138.39933607530153, 108.03894115495437, 690.0392735029533, 
119.02072209732388, 25.681743145215783, 660.2742535819832, 26.015958926597957], 
"eval_len": [81, 25, 88, 81, 75, 304, 82, 27, 273, 30]}

  9%|▉         | 89995/1000000 [46:56<6:04:10, 41.65it/s]global step 90000, trans_decision ep_re 204.59273716545886

{"global_step": 90000, "eval_re": [45.13078314923384, 312.40679104700655, 
411.55319984956884, 186.13329301713773, 117.85087622402054, 207.182411534849, 
83.10441455333964, 291.58468761932903, 128.6615891374498, 262.31932552265386], 
"eval_len": [43, 143, 195, 96, 80, 111, 64, 132, 79, 123]}

 10%|▉         | 99995/1000000 [52:07<5:58:54, 41.79it/s]global step 100000, trans_decision ep_re 131.65183001047848

{"global_step": 100000, "eval_re": [108.35235784612922, 101.09447151544065, 
98.62061205303624, 220.53457408218833, 197.01935591066393, 21.291241617794043, 
191.10327782769744, 187.92375972734175, 103.62816223275425, 86.95048729173887], 
"eval_len": [78, 76, 76, 123, 108, 24, 104, 107, 77, 68]}

 11%|█         | 109995/1000000 [57:16<5:54:22, 41.86it/s]global step 110000, trans_decision ep_re 235.07630314467414

{"global_step": 110000, "eval_re": [343.1219328731769, 59.540654340656886, 
264.04915898031953, 63.11707705056751, 370.6493023264343, 49.971274688507236, 
386.3986255498936, 181.03359053092464, 458.0970035859286, 174.78441152033258], 
"eval_len": [171, 57, 134, 59, 163, 51, 166, 105, 200, 97]}

 12%|█▏        | 119997/1000000 [1:02:40<5:51:04, 41.78it/s]global step 120000, trans_decision ep_re 303.6220733835022

{"global_step": 120000, "eval_re": [630.4885524201738, 463.4260550497807, 
85.40525393266132, 291.16127413158387, 416.47152935023286, 418.87291121898704, 
191.47783767942332, 190.76870886347456, 149.3721075869001, 198.77650360180428], 
"eval_len": [271, 209, 65, 131, 199, 196, 111, 108, 86, 121]}

 13%|█▎        | 129996/1000000 [1:07:41<5:43:34, 42.20it/s]global step 130000, trans_decision ep_re 151.99346929310664

{"global_step": 130000, "eval_re": [131.65564394341246, 325.3411052305498, 
41.55830561792811, 163.6642892035688, 38.504590044813796, 212.44795086022643, 
258.21061614995756, 46.219535484722634, 261.8180981478651, 40.51455824802159], 
"eval_len": [82, 149, 43, 90, 43, 110, 125, 44, 125, 42]}

 14%|█▍        | 139996/1000000 [1:12:53<5:39:10, 42.26it/s]global step 140000, trans_decision ep_re 260.2108290428854

{"global_step": 140000, "eval_re": [389.2693155683637, 168.80022304991797, 
293.0327951381754, 142.48911732024024, 192.62573160641927, 469.68931590627375, 
459.3050949049497, 41.31501014964739, 241.57982780456786, 204.00185898029844], 
"eval_len": [187, 103, 156, 88, 124, 213, 194, 42, 136, 106]}

 15%|█▍        | 149999/1000000 [1:18:07<5:40:23, 41.62it/s]global step 150000, trans_decision ep_re 153.0007365048005

{"global_step": 150000, "eval_re": [78.32968807340146, 415.92884358563583, 
61.46320497078266, 230.32710390130325, 64.25881128655341, 64.24730299710521, 
260.1935699387926, 22.756916903494183, 56.932193947031024, 275.5697294439053], 
"eval_len": [64, 193, 58, 121, 58, 54, 144, 25, 52, 153]}

 16%|█▌        | 159999/1000000 [1:23:19<5:34:34, 41.84it/s]global step 160000, trans_decision ep_re 169.97550210269117

{"global_step": 160000, "eval_re": [158.37298207951756, 470.8469325203197, 
216.50131762141095, 42.876019278261346, 190.93428422259444, 38.13932892638806, 
85.22681555164516, 33.29874731791758, 429.5881257405031, 33.970467768353714], 
"eval_len": [91, 194, 121, 41, 116, 37, 71, 35, 183, 36]}

 17%|█▋        | 169996/1000000 [1:28:31<5:26:47, 42.33it/s]global step 170000, trans_decision ep_re 490.23138169019137

{"global_step": 170000, "eval_re": [69.96848602012692, 2125.52963503052, 
168.66777012780733, 77.87637189548711, 71.91593583186052, 1501.5227405404817, 
48.48671754096343, 66.2966793257745, 714.4911500103887, 57.55833057850425], 
"eval_len": [58, 690, 94, 64, 62, 544, 56, 57, 276, 60]}

 18%|█▊        | 179996/1000000 [1:33:45<5:22:10, 42.42it/s]global step 180000, trans_decision ep_re 231.69166804748338

{"global_step": 180000, "eval_re": [41.97612701735121, 170.1008886957638, 
228.94488655277036, 39.22825895378193, 479.32948011710477, 769.934757498209, 
245.03923892833038, 37.36479800741051, 270.03619544428966, 34.96204925982254], 
"eval_len": [43, 100, 122, 43, 196, 312, 127, 42, 134, 41]}

 19%|█▉        | 189996/1000000 [1:38:56<5:19:54, 42.20it/s]global step 190000, trans_decision ep_re 244.2815716804972

{"global_step": 190000, "eval_re": [191.86061834655774, 190.27663663351666, 
434.9442486732036, 59.9555129971154, 428.2301701174216, 81.14123651438632, 
522.1708947292146, 174.20693250698875, 182.52477749853495, 177.5046887880321], 
"eval_len": [98, 103, 176, 60, 178, 67, 196, 97, 96, 103]}

 20%|█▉        | 199997/1000000 [1:44:08<5:19:01, 41.79it/s]global step 200000, trans_decision ep_re 252.75089925703008

{"global_step": 200000, "eval_re": [62.561217298178086, 89.97630304140968, 
451.5583160349848, 83.08227418237385, 452.97038504535954, 59.72601371787039, 
200.3151144888002, 299.9006972500782, 768.1575192758141, 59.26115223543228], 
"eval_len": [55, 69, 179, 76, 172, 55, 103, 141, 266, 54]}

 21%|██        | 209996/1000000 [1:49:30<5:10:28, 42.41it/s]global step 210000, trans_decision ep_re 562.9936666063267

{"global_step": 210000, "eval_re": [397.2958062583759, 836.8423166024273, 
77.28557881804876, 63.08725227121713, 1512.4047127498015, 454.5267654914247, 
1066.7091929546782, 79.11787198958022, 1077.6835810078212, 64.98358791989195], 
"eval_len": [174, 295, 67, 64, 498, 181, 372, 63, 358, 57]}

 22%|██▏       | 219998/1000000 [1:54:41<5:25:23, 39.95it/s]global step 220000, trans_decision ep_re 218.30967596447385

{"global_step": 220000, "eval_re": [58.665017983799615, 43.55290740918912, 
159.06282029085483, 48.03545427660448, 469.62091658345696, 35.80468922951519, 
36.13315264379186, 456.12841823276506, 549.2753765092829, 326.8180064854785], 
"eval_len": [53, 43, 90, 46, 181, 40, 38, 181, 216, 146]}

 23%|██▎       | 229996/1000000 [2:00:17<5:02:06, 42.48it/s]global step 230000, trans_decision ep_re 348.5484278967646

{"global_step": 230000, "eval_re": [88.41123950700444, 184.7189491494241, 
571.6592154768024, 172.9509746112791, 440.6052088002728, 460.2681643131536, 
801.5709243157452, 534.8688305697028, 38.485883146993636, 191.9448890772677], 
"eval_len": [73, 98, 212, 99, 175, 176, 270, 207, 39, 102]}

 24%|██▍       | 239997/1000000 [2:05:58<6:32:43, 32.25it/s]global step 240000, trans_decision ep_re 129.19209201531947

{"global_step": 240000, "eval_re": [38.47640481306918, 41.900822529998955, 
21.972754789096797, 202.17465283317398, 60.89866014499267, 578.1413379872694, 
26.849411485494038, 56.31660382767351, 241.875780745281, 23.31449099714542], 
"eval_len": [42, 42, 29, 102, 56, 215, 30, 58, 118, 27]}

 25%|██▍       | 249996/1000000 [2:11:40<4:56:24, 42.17it/s]global step 250000, trans_decision ep_re 331.50352279197307

{"global_step": 250000, "eval_re": [742.7112156513282, 229.92625651572232, 
545.0238635728499, 21.665972318757223, 838.844210749936, 29.712811856483754, 
258.31568000224047, 192.8593172796359, 86.6600366313508, 369.3158633414253], 
"eval_len": [240, 115, 208, 26, 281, 39, 125, 103, 71, 159]}

 26%|██▌       | 259996/1000000 [2:17:04<4:50:21, 42.48it/s]global step 260000, trans_decision ep_re 313.34443280133536

{"global_step": 260000, "eval_re": [153.21628131528033, 170.42314490711206, 
174.36907118899032, 702.7094174616055, 177.52356120955596, 409.8067822227008, 
175.66443190882015, 603.2549342257593, 462.38603051501246, 104.0906730585169], 
"eval_len": [85, 94, 94, 253, 99, 178, 97, 227, 179, 82]}

 27%|██▋       | 269997/1000000 [2:22:24<5:11:04, 39.11it/s]global step 270000, trans_decision ep_re 65.08691105052682

{"global_step": 270000, "eval_re": [40.715290992337565, 59.58226717569717, 
177.93494865023655, 64.30216880586026, 52.160474616769164, 41.93911727059048, 
70.95473410826072, 38.28489630458288, 46.24167060022323, 58.75354198071032], 
"eval_len": [43, 52, 100, 56, 51, 43, 62, 41, 43, 56]}

 28%|██▊       | 279998/1000000 [2:27:47<4:53:16, 40.92it/s]global step 280000, trans_decision ep_re 390.2835116540349

{"global_step": 280000, "eval_re": [862.2066591628084, 216.38554018880248, 
480.8442112068949, 27.482786621757523, 467.94271067132917, 61.16997108874035, 
752.0068176102084, 254.82887945841904, 275.36439996010546, 504.6031405712839], 
"eval_len": [303, 107, 182, 29, 185, 59, 256, 118, 129, 194]}

 29%|██▉       | 289997/1000000 [2:33:06<4:42:37, 41.87it/s]global step 290000, trans_decision ep_re 138.7952982088696

{"global_step": 290000, "eval_re": [401.94648992991915, 83.35187176923661, 
34.77137305417652, 263.1041312456028, 55.60117604458481, 264.2614000037072, 
65.74920444928887, 76.2016035072589, 60.633001250708695, 82.33273083421271], 
"eval_len": [161, 72, 40, 125, 49, 120, 56, 63, 60, 73]}

 30%|██▉       | 299997/1000000 [2:38:19<4:39:51, 41.69it/s]global step 300000, trans_decision ep_re 304.6825504771562

{"global_step": 300000, "eval_re": [180.53955039430838, 68.59168305514275, 
475.3185445614697, 62.74948903225446, 99.5437571056805, 64.81899909556917, 
181.4442604152927, 183.5308086464061, 58.17591897987961, 1672.1124934855588], 
"eval_len": [99, 59, 183, 62, 75, 62, 92, 96, 57, 527]}

 31%|███       | 309997/1000000 [2:43:31<4:34:17, 41.93it/s]global step 310000, trans_decision ep_re 80.85097656126739

{"global_step": 310000, "eval_re": [59.55423862831317, 200.08128133780824, 
51.37734737995268, 31.851411938454135, 57.08541325859582, 236.1562816087928, 
49.10027697987311, 41.80307017825365, 34.2147850661026, 47.28565923652757], 
"eval_len": [54, 105, 46, 37, 50, 116, 45, 43, 35, 49]}

 32%|███▏      | 319998/1000000 [2:48:56<4:28:13, 42.25it/s]global step 320000, trans_decision ep_re 166.94413651327014

{"global_step": 320000, "eval_re": [39.5168368047375, 62.53101562044002, 
24.557118684442493, 37.41211009522272, 58.160483171026236, 85.19036494832588, 
40.075243570032065, 693.4155055573715, 198.64752758461665, 429.93515909648625], 
"eval_len": [41, 57, 27, 35, 56, 69, 39, 247, 105, 166]}

 33%|███▎      | 329995/1000000 [2:54:12<4:27:30, 41.74it/s]global step 330000, trans_decision ep_re 97.93239604031717

{"global_step": 330000, "eval_re": [72.32713800453604, 78.19906193751571, 
84.61303310735211, 201.01806557589686, 50.843613722993354, 224.3222872393198, 
46.63721409015176, 44.43559788355874, 60.204904858925126, 116.7230439829222], 
"eval_len": [60, 67, 71, 100, 46, 119, 46, 44, 54, 82]}

 34%|███▍      | 339999/1000000 [2:59:59<4:20:56, 42.16it/s]global step 340000, trans_decision ep_re 161.30494539404245

{"global_step": 340000, "eval_re": [179.9202145173153, 237.05123050285388, 
59.086607758630514, 39.71896946940613, 36.824313418158034, 251.62820013508002, 
265.8662768930754, 42.3519118117404, 467.73370694234654, 32.86802249181805], 
"eval_len": [109, 130, 51, 41, 38, 129, 136, 42, 179, 40]}

 35%|███▍      | 349997/1000000 [3:05:25<4:21:19, 41.46it/s]global step 350000, trans_decision ep_re 166.26699720085662

{"global_step": 350000, "eval_re": [57.30713349421677, 38.18123003126465, 
82.01306943375003, 81.59294306108085, 41.325771421800575, 66.19973990419662, 
378.7418255864169, 68.8414385545886, 72.99740677722757, 775.4694137440237], 
"eval_len": [55, 40, 68, 66, 41, 59, 159, 60, 58, 266]}

 36%|███▌      | 359998/1000000 [3:11:01<4:21:14, 40.83it/s]global step 360000, trans_decision ep_re 45.68778326198576

{"global_step": 360000, "eval_re": [61.874999527755634, 33.39902741675205, 
39.367225268181095, 44.69988756269598, 75.34175499812852, 43.844384791016054, 
57.90221996858212, 29.265963958117023, 29.136604297670985, 42.04576483095818], 
"eval_len": [55, 39, 42, 44, 68, 43, 50, 36, 37, 42]}

 37%|███▋      | 369997/1000000 [3:16:22<4:14:25, 41.27it/s]global step 370000, trans_decision ep_re 76.17420254355156

{"global_step": 370000, "eval_re": [60.672452346269246, 163.91352479276014, 
35.437164117127494, 180.45284499789068, 86.3669370384157, 38.75976458215247, 
40.11593045364974, 72.83048823389433, 30.484050195851452, 52.708868677504306], 
"eval_len": [58, 94, 38, 104, 70, 42, 41, 65, 34, 49]}

 38%|███▊      | 379999/1000000 [3:22:11<5:10:46, 33.25it/s]global step 380000, trans_decision ep_re 254.0003909681668

{"global_step": 380000, "eval_re": [41.74904103418574, 198.42858614348654, 
198.54452912311507, 197.1655727544875, 189.90067935701785, 704.664673342755, 
33.71385050742516, 465.8253122062422, 476.7921612637201, 33.219503949233086], 
"eval_len": [42, 109, 101, 104, 96, 245, 39, 176, 176, 39]}

 39%|███▉      | 389999/1000000 [3:27:45<4:12:40, 40.24it/s]global step 390000, trans_decision ep_re 116.7343234157499

{"global_step": 390000, "eval_re": [477.5808677290715, 41.634111253233996, 
46.16564128708723, 217.1975155005293, 32.70117069920428, 58.01929149264291, 
43.95313449516101, 45.68642509429371, 48.40143277483353, 156.00364383144145], 
"eval_len": [176, 46, 44, 111, 38, 51, 42, 46, 43, 92]}

 40%|███▉      | 399997/1000000 [3:33:08<3:57:29, 42.11it/s]global step 400000, trans_decision ep_re 195.68421252180173

{"global_step": 400000, "eval_re": [990.5187848541674, 39.20112795078377, 
30.57239138447323, 205.80912070833705, 201.7679019298016, 33.461380700125595, 
213.5561041313897, 155.95096731229233, 39.20382926177668, 46.80051698486963], 
"eval_len": [341, 38, 35, 110, 103, 39, 107, 88, 41, 44]}

 41%|████      | 409995/1000000 [3:38:17<3:52:44, 42.25it/s]global step 410000, trans_decision ep_re 419.8941507638888

{"global_step": 410000, "eval_re": [32.61260907038364, 159.20171545703866, 
31.186042609935125, 448.0848707151516, 146.6445695592036, 922.2273438942705, 
875.1490075227553, 135.987413573831, 663.7272944131068, 784.1206408232114], 
"eval_len": [37, 93, 38, 169, 86, 299, 304, 88, 227, 245]}

 42%|████▏     | 419995/1000000 [3:43:26<3:49:34, 42.11it/s]global step 420000, trans_decision ep_re 233.29617799943077

{"global_step": 420000, "eval_re": [36.66327052078146, 469.3683680415204, 
757.2013402253053, 31.675767750061162, 28.558775103970945, 149.03684956320546, 
345.2986576791484, 37.87394403814597, 203.90949847672482, 273.3753085954436], 
"eval_len": [37, 175, 242, 37, 33, 88, 154, 38, 110, 131]}

 43%|████▎     | 429995/1000000 [3:48:35<3:44:53, 42.24it/s]global step 430000, trans_decision ep_re 344.9442365891898

{"global_step": 430000, "eval_re": [441.2751004240714, 143.55143487230197, 
43.94196178934059, 66.82458934196823, 31.07556033190554, 1100.7845969142502, 
123.26429511083029, 941.6824925389544, 78.4664182352083, 478.5759163330672], 
"eval_len": [161, 95, 42, 60, 35, 337, 85, 318, 59, 189]}

 44%|████▍     | 439998/1000000 [3:53:44<3:39:10, 42.58it/s]global step 440000, trans_decision ep_re 80.91275741752679

{"global_step": 440000, "eval_re": [27.344725793014923, 60.76145182717807, 
152.42930711815853, 224.5362447703455, 154.89158650704147, 37.22856863964248, 
28.641973182446428, 44.92453679225917, 43.672137812788606, 34.697041732392776], 
"eval_len": [33, 51, 92, 108, 91, 39, 36, 42, 43, 38]}

 45%|████▍     | 449998/1000000 [3:59:02<3:34:30, 42.73it/s]global step 450000, trans_decision ep_re 462.56082599165103

{"global_step": 450000, "eval_re": [862.9977394437847, 233.35140108614996, 
1735.1665291223244, 205.80579471288252, 35.02860473508191, 19.134884160391056, 
704.1067351890235, 29.31969062536846, 765.1389138661859, 35.55796697531756], 
"eval_len": [264, 115, 535, 103, 39, 22, 229, 37, 268, 39]}

 46%|████▌     | 459995/1000000 [4:04:06<3:34:23, 41.98it/s]global step 460000, trans_decision ep_re 161.37773387182693

{"global_step": 460000, "eval_re": [24.162097433905547, 42.290757805670864, 
129.17384995781285, 373.90571907894923, 52.720426087482274, 247.45573997845318, 
33.84015690939694, 207.10843061838193, 473.4505469619785, 29.669613886237947], 
"eval_len": [30, 44, 80, 159, 49, 125, 37, 116, 173, 36]}

 47%|████▋     | 469995/1000000 [4:09:14<3:29:45, 42.11it/s]global step 470000, trans_decision ep_re 181.1266091739202

{"global_step": 470000, "eval_re": [506.8745050351371, 169.7546995859347, 
35.848365407225394, 36.53375776160282, 38.52824117473922, 260.38915358374794, 
26.036285860388162, 37.62199039506062, 236.0002038817426, 463.67888905362366], 
"eval_len": [186, 92, 38, 39, 43, 119, 28, 42, 117, 175]}

 48%|████▊     | 479995/1000000 [4:14:20<3:26:04, 42.06it/s]global step 480000, trans_decision ep_re 269.10939420520924

{"global_step": 480000, "eval_re": [472.69650609303943, 210.60256798764692, 
766.9122776168816, 34.92787817012837, 806.8252868722265, 113.0524523352534, 
172.74714190291346, 43.23319578506938, 33.193692108428266, 36.90294318050494], 
"eval_len": [176, 110, 261, 38, 267, 77, 97, 44, 36, 38]}

 49%|████▉     | 489995/1000000 [4:19:29<3:21:19, 42.22it/s]global step 490000, trans_decision ep_re 216.27933182625398

{"global_step": 490000, "eval_re": [374.845962744725, 227.08849353038855, 
216.8004543956332, 614.8561345970513, 24.607639978556858, 356.5488162043562, 
250.34826037530968, 33.74190354767262, 25.45171513932239, 38.503937749523956], 
"eval_len": [154, 113, 114, 225, 34, 148, 115, 34, 27, 42]}

 50%|████▉     | 499995/1000000 [4:24:38<3:17:06, 42.28it/s]global step 500000, trans_decision ep_re 109.70123230365925

{"global_step": 500000, "eval_re": [28.97703546468888, 38.63569250661662, 
36.37135650712465, 749.4062969759916, 32.63843518952156, 56.51899259491598, 
34.132662240453804, 38.020041057816215, 50.00534037815048, 32.3064701213126], 
"eval_len": [36, 41, 38, 254, 37, 52, 39, 38, 47, 36]}

 51%|█████     | 509999/1000000 [4:29:49<3:13:51, 42.13it/s]global step 510000, trans_decision ep_re 412.2643502080471

{"global_step": 510000, "eval_re": [364.73105004709765, 186.77682676040877, 
622.6844287781603, 201.149312166673, 751.5840332736888, 1480.4904710391584, 
231.65677900269878, 36.316259928023506, 203.60588151301283, 43.6484595715492], 
"eval_len": [157, 107, 226, 104, 256, 469, 111, 39, 96, 44]}

 52%|█████▏    | 519999/1000000 [4:35:06<3:10:38, 41.96it/s]global step 520000, trans_decision ep_re 149.3688611377412

{"global_step": 520000, "eval_re": [47.491212908060476, 40.66354656129455, 
198.1589249558145, 52.08221296207455, 434.5515071528792, 465.16008628439914, 
138.67551103702573, 34.82278425690644, 34.615341296568, 47.46748396238938], 
"eval_len": [44, 39, 104, 45, 164, 164, 84, 39, 38, 43]}

 53%|█████▎    | 529999/1000000 [4:40:21<3:05:46, 42.16it/s]global step 530000, trans_decision ep_re 298.9635048146763

{"global_step": 530000, "eval_re": [32.88235484861176, 36.42491299645486, 
840.076602100243, 35.368597471477116, 39.499804240408814, 365.61081903595885, 
1054.5769822219847, 477.15777243547404, 64.21289003031933, 43.824312765830754], 
"eval_len": [39, 39, 291, 39, 40, 154, 347, 179, 56, 42]}

 54%|█████▍    | 539999/1000000 [4:45:36<3:01:53, 42.15it/s]global step 540000, trans_decision ep_re 72.71492853500094

{"global_step": 540000, "eval_re": [54.06612089598614, 57.06419993677639, 
32.777127025096036, 28.863278966493112, 353.14712555317254, 56.73052371633244, 
32.890870054977576, 32.690655084623046, 44.40103958693335, 34.51834452961857], 
"eval_len": [52, 52, 38, 32, 155, 50, 37, 37, 41, 39]}

 55%|█████▍    | 549999/1000000 [4:50:48<2:57:02, 42.36it/s]global step 550000, trans_decision ep_re 144.62757539387056

{"global_step": 550000, "eval_re": [105.74087987379549, 107.52021590554861, 
138.79913939325243, 196.78515754356934, 106.02452330336094, 73.39516106228197, 
140.72461824097962, 73.32176470058515, 394.97438043757336, 108.98991347775858], 
"eval_len": [74, 73, 90, 108, 77, 49, 86, 50, 165, 74]}

 56%|█████▌    | 559998/1000000 [4:56:02<2:52:02, 42.63it/s]global step 560000, trans_decision ep_re 206.55028881404775

{"global_step": 560000, "eval_re": [217.30281231052027, 28.551394018307505, 
443.0095695298699, 464.7384558717729, 517.9382672112796, 146.99553474259173, 
26.118973106004347, 34.34822342112257, 22.364181632379125, 164.13547629662975], 
"eval_len": [115, 31, 176, 172, 191, 91, 28, 39, 26, 97]}

 57%|█████▋    | 569998/1000000 [5:01:12<2:47:36, 42.76it/s]global step 570000, trans_decision ep_re 183.68556868336742

{"global_step": 570000, "eval_re": [85.4718895209503, 79.55630689642287, 
75.59572843523635, 121.95438562967104, 72.97089873737094, 75.5131571151149, 
104.37622965790926, 975.8840672961873, 126.72217166049583, 118.81085188431561], 
"eval_len": [65, 59, 56, 76, 57, 58, 74, 321, 80, 73]}

 58%|█████▊    | 579998/1000000 [5:06:21<2:44:25, 42.57it/s]global step 580000, trans_decision ep_re 209.78125175275014

{"global_step": 580000, "eval_re": [50.485790611719466, 39.915424317233104, 
494.1585181985468, 31.556444193317226, 39.22486526964236, 257.0394057953979, 
761.6875560979063, 33.91317261066728, 341.626877629255, 48.204462803816085], 
"eval_len": [48, 43, 188, 34, 40, 124, 258, 38, 150, 46]}

 59%|█████▉    | 589997/1000000 [5:11:37<2:42:47, 41.98it/s]global step 590000, trans_decision ep_re 426.5214189208691

{"global_step": 590000, "eval_re": [580.8580318049256, 251.41786299462152, 
145.93918821341921, 1029.6552258872632, 525.3730048393967, 41.45383067816817, 
25.32616095412614, 152.7928652355682, 774.146325101736, 738.2516934994661], 
"eval_len": [199, 125, 92, 334, 192, 45, 32, 94, 266, 240]}

 60%|█████▉    | 599995/1000000 [5:17:04<2:38:09, 42.15it/s]global step 600000, trans_decision ep_re 709.2018468964304

{"global_step": 600000, "eval_re": [1570.3081301639704, 197.80093023610038, 
1313.3669952226087, 22.93370455554871, 224.7898361475366, 1256.4100156355944, 
826.2615358843648, 783.8952674487704, 134.8566112835795, 761.3954423862295], 
"eval_len": [465, 106, 399, 27, 113, 396, 288, 266, 84, 260]}

 61%|██████    | 609998/1000000 [5:22:11<2:31:34, 42.88it/s]global step 610000, trans_decision ep_re 283.84826203837036

{"global_step": 610000, "eval_re": [1298.8625356934328, 46.611898052030696, 
38.102855100863934, 291.03258160022665, 38.43793231840218, 179.92806306891546, 
39.821257027426924, 801.1081843814725, 34.720035609467594, 69.85727753146477], 
"eval_len": [420, 44, 41, 133, 42, 97, 40, 270, 40, 64]}

 62%|██████▏   | 619998/1000000 [5:27:22<2:28:38, 42.61it/s]global step 620000, trans_decision ep_re 197.6087056497567

{"global_step": 620000, "eval_re": [571.1829997457853, 147.1268569919945, 
85.4338175228558, 190.0363517484853, 362.80902882274245, 128.4527381704693, 
163.06937027459455, 109.97103708749877, 90.5006052942743, 127.50425083886677], 
"eval_len": [213, 90, 59, 101, 157, 81, 96, 70, 66, 83]}

 63%|██████▎   | 629998/1000000 [5:32:45<2:24:36, 42.65it/s]global step 630000, trans_decision ep_re 358.6049341226459

{"global_step": 630000, "eval_re": [42.50751926167354, 695.3775544945871, 
28.64231011036301, 647.9730370216292, 376.35616059328083, 1095.4945877940988, 
69.90262335837282, 249.1773952504436, 344.65269228044974, 35.96546106156025], 
"eval_len": [42, 234, 29, 206, 154, 324, 63, 119, 156, 38]}

 64%|██████▍   | 639995/1000000 [5:37:46<2:22:11, 42.20it/s]global step 640000, trans_decision ep_re 440.36715281307715

{"global_step": 640000, "eval_re": [42.290695559682185, 1231.3899469799558, 
21.5017273117139, 171.80925305741448, 549.1602008962338, 217.07633616647854, 
1362.617058694531, 216.1622864932276, 413.2258175749597, 178.4382053965746], 
"eval_len": [42, 393, 24, 95, 199, 113, 414, 108, 163, 94]}

 65%|██████▍   | 649995/1000000 [5:42:59<2:18:19, 42.17it/s]global step 650000, trans_decision ep_re 199.99752695078527

{"global_step": 650000, "eval_re": [19.508102973567773, 44.16759746595254, 
35.30601086957356, 30.646791982705583, 48.956286097965624, 727.885069928466, 
29.582362876574326, 50.469952315844004, 976.5384230895734, 36.91467190763011], 
"eval_len": [23, 43, 37, 35, 47, 254, 35, 46, 323, 39]}

 66%|██████▌   | 659995/1000000 [5:48:12<2:14:55, 42.00it/s]global step 660000, trans_decision ep_re 231.7253382954015

{"global_step": 660000, "eval_re": [448.56218689421166, 29.368401248020223, 
804.3075889853467, 37.66062283295709, 218.18636486185926, 35.966921962790934, 
450.07151528747426, 40.300168672734586, 217.23585725193624, 35.5937549566844], 
"eval_len": [176, 34, 279, 38, 110, 38, 175, 40, 108, 39]}

 67%|██████▋   | 669998/1000000 [5:53:21<2:09:12, 42.57it/s]global step 670000, trans_decision ep_re 309.9320672148932

{"global_step": 670000, "eval_re": [282.8298156550918, 343.17462022523495, 
114.14863357277027, 775.5350553074431, 433.1632619370936, 709.4178563103936, 
340.0637953259767, 35.307736801817676, 25.5529200484385, 40.126976964672096], 
"eval_len": [135, 152, 72, 259, 169, 235, 148, 37, 31, 41]}

 68%|██████▊   | 679998/1000000 [5:58:31<2:05:12, 42.60it/s]global step 680000, trans_decision ep_re 183.37196644823314

{"global_step": 680000, "eval_re": [272.51646484962185, 294.4887273332314, 
52.19089579613929, 39.5138231674394, 38.117485201242346, 267.4638099044757, 
266.48831090479183, 273.6439857387652, 31.278595831790586, 298.0175657548336], 
"eval_len": [123, 126, 52, 40, 39, 118, 121, 124, 37, 137]}

 69%|██████▉   | 689998/1000000 [6:03:43<2:00:54, 42.73it/s]global step 690000, trans_decision ep_re 389.74314396108326

{"global_step": 690000, "eval_re": [31.41793515022684, 486.15453984935056, 
208.73842192160433, 202.04507580179407, 586.8365311195613, 456.94930319051826, 
531.479350482822, 288.29024130026266, 538.9666180011895, 566.5534227935032], 
"eval_len": [35, 191, 104, 103, 208, 180, 195, 129, 210, 219]}

 70%|██████▉   | 699998/1000000 [6:08:57<1:56:58, 42.74it/s]global step 700000, trans_decision ep_re 69.09411043916846

{"global_step": 700000, "eval_re": [48.2285921443856, 39.84722220968438, 
38.979170583701055, 39.06513220739504, 38.70897463364795, 38.61770811525018, 
308.5106608032954, 36.67775728539801, 79.33900605994144, 22.966880348985423], 
"eval_len": [46, 39, 41, 40, 41, 40, 136, 40, 62, 27]}

 71%|███████   | 709998/1000000 [6:14:07<1:53:18, 42.65it/s]global step 710000, trans_decision ep_re 296.2029045450768

{"global_step": 710000, "eval_re": [48.61329096997915, 38.673765056445475, 
765.0215920747559, 841.1765702056164, 26.911001619503164, 78.8007728585226, 
476.7656608575637, 229.47077227985017, 77.50748839298595, 379.08813113554544], 
"eval_len": [44, 41, 260, 257, 35, 63, 177, 112, 63, 156]}

 72%|███████▏  | 719998/1000000 [6:19:26<1:49:54, 42.46it/s]global step 720000, trans_decision ep_re 212.40208559531226

{"global_step": 720000, "eval_re": [182.28824359643335, 75.17363162598011, 
40.55870261082543, 183.9981027863737, 47.23067764542786, 491.29667654417625, 
40.07117704501304, 979.2341403551088, 52.44885718570462, 31.72064655807927], 
"eval_len": [102, 61, 42, 101, 45, 190, 43, 323, 50, 35]}

 73%|███████▎  | 729998/1000000 [6:24:40<1:45:31, 42.65it/s]global step 730000, trans_decision ep_re 155.94099064552117

{"global_step": 730000, "eval_re": [130.30801171957404, 121.94949377034004, 
290.8140844976937, 111.88756509319862, 112.75625133786725, 141.3961459924251, 
109.80673349917612, 259.33736221096564, 138.69931335375682, 142.45494498021438],
"eval_len": [88, 76, 125, 77, 72, 85, 78, 120, 84, 84]}

 74%|███████▍  | 739998/1000000 [6:29:53<1:41:41, 42.61it/s]global step 740000, trans_decision ep_re 429.7810757828844

{"global_step": 740000, "eval_re": [215.9709641335182, 437.8126027078224, 
187.73950918113812, 447.95207622537936, 291.0943247352861, 554.6430241900858, 
478.16789354286067, 433.48354847837186, 1052.271258621611, 198.67555601277058], 
"eval_len": [116, 180, 99, 181, 153, 192, 189, 169, 351, 115]}

 75%|███████▍  | 749998/1000000 [6:35:08<1:37:32, 42.71it/s]global step 750000, trans_decision ep_re 184.7635652542926

{"global_step": 750000, "eval_re": [705.0264558450017, 26.500213161562478, 
551.4003307419098, 53.96279126140583, 44.584357347206236, 171.49176462761324, 
190.28633026996357, 32.99931853832969, 24.2851241966615, 47.098966553272014], 
"eval_len": [242, 30, 207, 48, 46, 97, 102, 35, 27, 46]}

 76%|███████▌  | 759996/1000000 [6:40:21<1:33:47, 42.65it/s]global step 760000, trans_decision ep_re 300.39535344661397

{"global_step": 760000, "eval_re": [31.246112426960494, 1269.2809266289948, 
174.52388647835554, 46.095027138359356, 63.928121559099, 417.0671133395838, 
195.18213066269448, 195.0926830119653, 204.25020291919208, 407.2873303009346], 
"eval_len": [34, 375, 98, 45, 57, 166, 107, 102, 107, 177]}

 77%|███████▋  | 769996/1000000 [6:45:33<1:29:18, 42.92it/s]global step 770000, trans_decision ep_re 260.9367617811562

{"global_step": 770000, "eval_re": [54.58510887842972, 42.41357804292109, 
49.733621589842194, 185.4045438899401, 235.39852454728853, 933.4683094555753, 
48.2980980646002, 304.5677668757856, 731.3743113533956, 24.123755113783613], 
"eval_len": [50, 41, 49, 96, 116, 311, 47, 139, 257, 29]}

 78%|███████▊  | 779996/1000000 [6:50:44<1:25:45, 42.76it/s]global step 780000, trans_decision ep_re 220.20595651765652

{"global_step": 780000, "eval_re": [734.4611621762285, 280.7819503094256, 
45.92807660154561, 184.36849745239067, 208.61817807489675, 174.3572165624074, 
42.34985634930112, 214.43072628265892, 32.48361121780353, 284.2802901499068], 
"eval_len": [231, 129, 45, 97, 107, 94, 44, 108, 39, 132]}

 79%|███████▉  | 789996/1000000 [6:55:53<1:22:07, 42.62it/s]global step 790000, trans_decision ep_re 54.03858206555797

{"global_step": 790000, "eval_re": [30.1472161982748, 45.15428127599271, 
54.77434006935044, 28.299487584744874, 28.725561262784023, 46.06277850508206, 
40.924371638227555, 28.518910641594125, 195.5705264316049, 42.20834704792418], 
"eval_len": [35, 45, 47, 33, 36, 47, 41, 35, 103, 43]}

 80%|███████▉  | 799995/1000000 [7:00:58<1:19:07, 42.13it/s]global step 800000, trans_decision ep_re 163.70505354668424

{"global_step": 800000, "eval_re": [23.261321177645904, 35.130269851686705, 
103.49160623608529, 195.31959764091235, 270.5219875931667, 27.743542160672952, 
212.50728154853365, 103.39744879719686, 159.7428753330049, 505.93460512793695], 
"eval_len": [25, 39, 71, 106, 124, 35, 109, 70, 91, 191]}

 81%|████████  | 809995/1000000 [7:06:07<1:15:15, 42.07it/s]global step 810000, trans_decision ep_re 96.89407222294979

{"global_step": 810000, "eval_re": [259.24547155482895, 27.298094058905537, 
30.942240044484198, 39.30273560692613, 34.28150247861412, 291.14500636263074, 
23.54998513704868, 39.044215515542284, 182.17905382915774, 41.95241764135951], 
"eval_len": [129, 34, 39, 39, 35, 133, 27, 43, 94, 41]}

 82%|████████▏ | 819995/1000000 [7:11:13<1:11:24, 42.01it/s]global step 820000, trans_decision ep_re 104.3124411015551

{"global_step": 820000, "eval_re": [51.24510538922744, 27.02419399080869, 
31.461192043185747, 169.98510998411962, 186.74047524511693, 267.3382980294968, 
44.268432126334964, 43.597373646789556, 188.68863927111602, 32.77559128935515], 
"eval_len": [50, 33, 34, 104, 101, 136, 48, 44, 102, 35]}

 83%|████████▎ | 829995/1000000 [7:16:19<1:43:09, 27.46it/s]global step 830000, trans_decision ep_re 154.92818799525674

{"global_step": 830000, "eval_re": [32.32094330145889, 197.636197329973, 
200.6454548083023, 148.1688791092489, 199.51320430492163, 37.05041542603514, 
188.9869704833558, 186.8539799250305, 168.101340929388, 190.00449433485306], 
"eval_len": [36, 107, 103, 86, 104, 42, 97, 103, 97, 102]}

 84%|████████▍ | 839995/1000000 [7:21:25<1:03:19, 42.12it/s]global step 840000, trans_decision ep_re 168.38874004010586

{"global_step": 840000, "eval_re": [122.84407516472452, 481.239870488412, 
46.332791150365956, 193.62428489322116, 366.3066344721564, 38.67696565325253, 
19.519962094944322, 175.62938987744764, 204.34723529964162, 35.36619130689253], 
"eval_len": [79, 186, 43, 101, 155, 41, 24, 95, 108, 41]}

 85%|████████▍ | 849999/1000000 [7:26:30<59:29, 42.02it/s]global step 850000, trans_decision ep_re 328.1433345752362

{"global_step": 850000, "eval_re": [478.43309720298384, 502.9667547817361, 
481.6451194096829, 739.4894142801145, 118.4535566909351, 243.8198441450264, 
39.285743536082634, 163.0050213760245, 177.20008722853692, 337.1347071012392], 
"eval_len": [187, 191, 180, 262, 78, 121, 41, 92, 98, 148]}

 86%|████████▌ | 859999/1000000 [7:31:40<55:34, 41.98it/s]global step 860000, trans_decision ep_re 249.79740370496205

{"global_step": 860000, "eval_re": [174.80893262646575, 720.0242912785361, 
530.5604232714845, 35.9272709072862, 40.24396202368951, 475.22465468495807, 
240.43037803547767, 204.8643615493784, 32.50902314937187, 43.380739522972405], 
"eval_len": [101, 242, 193, 35, 41, 186, 117, 104, 36, 43]}

 87%|████████▋ | 869999/1000000 [7:36:49<51:50, 41.79it/s]global step 870000, trans_decision ep_re 383.3995906701839

{"global_step": 870000, "eval_re": [27.481001220358923, 340.47992767892856, 
491.8019700736297, 1297.117761612877, 198.90227008830652, 27.607878707775654, 
208.8938533751297, 191.1061205347057, 886.0784889884262, 164.52663442170052], 
"eval_len": [32, 138, 203, 385, 113, 30, 112, 101, 275, 93]}

 88%|████████▊ | 879997/1000000 [7:42:01<47:57, 41.70it/s]global step 880000, trans_decision ep_re 251.83828864249296

{"global_step": 880000, "eval_re": [408.8521855902214, 172.08225040100228, 
58.11422007546142, 433.4810320216735, 199.2658703051547, 829.5751704174215, 
41.618004449909286, 141.38864950762178, 184.54926284225414, 49.45624081420982], 
"eval_len": [164, 93, 49, 163, 109, 276, 41, 89, 104, 48]}

 89%|████████▉ | 889998/1000000 [7:47:12<43:20, 42.30it/s]global step 890000, trans_decision ep_re 310.105836691382

{"global_step": 890000, "eval_re": [439.7599930411088, 251.2216570003976, 
453.12128830277544, 466.10349941278906, 185.40257436275832, 1064.0795484846578, 
26.248191914183604, 151.69468411359176, 32.407611249950286, 31.019319031607647],
"eval_len": [169, 124, 170, 183, 103, 327, 35, 91, 38, 37]}

 90%|████████▉ | 899998/1000000 [7:52:24<39:36, 42.09it/s]global step 900000, trans_decision ep_re 275.84856379137153

{"global_step": 900000, "eval_re": [352.3104388195465, 108.12905020198045, 
163.0007553491638, 200.7444607377214, 102.66603744177942, 225.44290377811706, 
261.1620236093152, 100.58190042055811, 169.70723413000823, 1074.7408334255251], 
"eval_len": [157, 78, 92, 116, 72, 127, 134, 72, 101, 353]}

 91%|█████████ | 909998/1000000 [7:57:35<35:28, 42.28it/s]global step 910000, trans_decision ep_re 101.9545767369243

{"global_step": 910000, "eval_re": [207.51703421084417, 155.35329179413867, 
159.35198126872388, 122.52589998434162, 88.69126603119123, 37.50131492753944, 
46.60099888881625, 105.72216414636267, 57.554059775937986, 38.7277563413471], 
"eval_len": [106, 91, 94, 79, 64, 37, 44, 73, 51, 38]}

 92%|█████████▏| 919997/1000000 [8:02:44<31:56, 41.75it/s]global step 920000, trans_decision ep_re 249.3377950967074

{"global_step": 920000, "eval_re": [369.78587049587236, 730.1254962785482, 
41.70867698741121, 44.159280427284216, 304.4829581198063, 143.01163893539552, 
113.56205376273893, 45.378966116720946, 318.9432759459595, 382.219733897337], 
"eval_len": [168, 246, 44, 48, 146, 79, 74, 44, 143, 165]}

 93%|█████████▎| 929995/1000000 [8:08:06<28:01, 41.64it/s]global step 930000, trans_decision ep_re 315.6365632780964

{"global_step": 930000, "eval_re": [49.15780934643005, 801.6725927678194, 
970.6350609065444, 232.23970099878352, 109.96794614904474, 176.0181403951137, 
37.10955582498359, 719.0531088110517, 25.94739995222025, 34.564317628972795], 
"eval_len": [47, 271, 287, 114, 74, 101, 43, 236, 32, 40]}

 94%|█████████▍| 939999/1000000 [8:13:12<23:53, 41.86it/s]global step 940000, trans_decision ep_re 281.89132973435267

{"global_step": 940000, "eval_re": [46.67152399533681, 41.67302928970765, 
702.4403879938257, 689.6177552345462, 47.41338373813375, 41.730769008370785, 
51.893850259233695, 457.39725873248415, 702.8715268961043, 37.20381219578359], 
"eval_len": [44, 45, 235, 234, 46, 43, 48, 172, 236, 40]}

 95%|█████████▍| 949999/1000000 [8:18:28<20:01, 41.61it/s]global step 950000, trans_decision ep_re 270.20267938283405

{"global_step": 950000, "eval_re": [69.06873567812883, 430.3269986165512, 
215.83880383881456, 37.695473395948035, 32.559592683465176, 32.254257473734256, 
34.98956183289679, 1770.4276687522224, 39.85791158121127, 39.007789975368226], 
"eval_len": [61, 169, 112, 40, 36, 34, 39, 513, 44, 41]}

 96%|█████████▌| 959996/1000000 [8:23:43<15:50, 42.09it/s]global step 960000, trans_decision ep_re 386.36689468653714

{"global_step": 960000, "eval_re": [43.941600871232914, 441.46391487154426, 
685.7773751262804, 69.5428832602695, 767.0592100671195, 203.8324636034604, 
459.1704131214285, 473.7514140512045, 37.93705738945138, 681.1926145033801], 
"eval_len": [45, 178, 253, 58, 273, 106, 185, 183, 43, 230]}

 97%|█████████▋| 969996/1000000 [8:29:03<11:50, 42.21it/s]global step 970000, trans_decision ep_re 366.7451420545602

{"global_step": 970000, "eval_re": [66.56654538892863, 327.0995917255849, 
1081.4435245119594, 190.50793843168626, 232.82625101215925, 24.182252268064975, 
28.0948836625155, 1020.1183452781414, 205.54267099146603, 491.06941727509565], 
"eval_len": [58, 148, 336, 102, 116, 32, 30, 325, 113, 189]}

 98%|█████████▊| 979996/1000000 [8:34:23<07:53, 42.24it/s]global step 980000, trans_decision ep_re 111.81384013880918

{"global_step": 980000, "eval_re": [92.98704168338575, 50.37389436757854, 
195.32613300345747, 58.44621978436968, 33.23661886942539, 29.758021801810084, 
205.96363003108405, 30.619952083462813, 204.74494615123248, 216.68194361228538],
"eval_len": [72, 48, 103, 50, 37, 34, 111, 37, 112, 112]}

 99%|█████████▉| 989996/1000000 [8:39:39<03:56, 42.30it/s]global step 990000, trans_decision ep_re 441.72339143632814

{"global_step": 990000, "eval_re": [712.6200042700322, 999.6745383094416, 
211.56563027736908, 432.1455709706032, 204.8720361715398, 183.81352528610188, 
702.2931594264061, 27.811589430253772, 28.820908012610133, 913.6169522089232], 
"eval_len": [242, 316, 110, 172, 104, 95, 232, 33, 34, 288]}

100%|█████████▉| 999998/1000000 [8:45:09<00:00, 34.87it/s]global step 1000000, trans_decision ep_re 82.90833077762306

{"global_step": 1000000, "eval_re": [38.20119038395569, 226.2202516194647, 
236.44875320826966, 34.375867213394606, 21.48147702543883, 137.9642198916712, 
31.319337919190364, 31.255988295726294, 37.404625292670936, 34.411596926448254],
"eval_len": [38, 117, 117, 37, 25, 84, 36, 37, 37, 39]}

100%|██████████| 1000000/1000000 [8:45:19<00:00, 31.73it/s]
