
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:40<12:58:52, 21.18it/s]global step 10000, trans_decision ep_re 36.735273331267976

{"global_step": 10000, "eval_re": [12.658582914497563, 29.20280441046175, 
28.63504361598398, 24.957589044293265, 15.120081560207428, 10.69708470554677, 
42.82237720544101, 95.30998204854663, 45.80333298107186, 62.14585482662945], 
"eval_len": [17, 33, 29, 29, 19, 13, 44, 64, 59, 60]}

  2%|▏         | 19999/1000000 [16:30<13:01:22, 20.90it/s]global step 20000, trans_decision ep_re 69.06918836593833

{"global_step": 20000, "eval_re": [104.81760529534883, 19.50203823256609, 
17.656618850006147, 106.18668997402104, 25.49093025348823, 106.74963180896567, 
81.00841573444933, 126.2998121195275, 82.1638080943427, 20.816333296667775], 
"eval_len": [101, 30, 21, 70, 31, 83, 76, 96, 69, 20]}

  3%|▎         | 29998/1000000 [27:20<12:38:42, 21.31it/s]global step 30000, trans_decision ep_re 69.24457132614303

{"global_step": 30000, "eval_re": [136.1319403246872, 19.819379478508377, 
27.980748063772772, 53.102223247618035, 24.947878765220427, 81.1967800332103, 
77.38616224495009, 81.35391337013175, 31.014155822588545, 159.51253191074287], 
"eval_len": [92, 26, 34, 64, 27, 74, 66, 66, 32, 96]}

  4%|▍         | 39999/1000000 [38:00<12:37:30, 21.12it/s]global step 40000, trans_decision ep_re 89.51103300403307

{"global_step": 40000, "eval_re": [99.64974639073407, 159.95477799957877, 
28.62634478097972, 31.09611276194811, 102.43875039517214, 142.06030419500078, 
76.15818943418078, 27.691665822144035, 110.28475385097367, 117.14968440961867], 
"eval_len": [74, 107, 34, 29, 89, 143, 80, 28, 113, 82]}

  5%|▍         | 49999/1000000 [49:00<12:39:07, 20.86it/s]global step 50000, trans_decision ep_re 72.4844617032452

{"global_step": 50000, "eval_re": [25.023059315886297, 55.08834200545634, 
105.01484227751355, 190.0472981463191, 21.74744693397075, 28.254098966557958, 
17.739577355235316, 23.14850673951914, 244.6603635908588, 14.121081701134765], 
"eval_len": [34, 53, 83, 119, 27, 28, 20, 22, 237, 16]}

  6%|▌         | 59998/1000000 [59:50<12:11:52, 21.41it/s]global step 60000, trans_decision ep_re 26.32598629916249

{"global_step": 60000, "eval_re": [15.380604990785258, 31.559277162294656, 
20.64052773220804, 35.918028648273236, 30.09076354649172, 18.97250490197721, 
23.279585994586895, 21.503507307755903, 28.572835696316552, 37.34222701093546], 
"eval_len": [24, 30, 32, 33, 28, 29, 26, 27, 30, 45]}

  7%|▋         | 69998/1000000 [1:10:30<11:59:48, 21.53it/s]global step 70000, trans_decision ep_re 95.9745947659143

{"global_step": 70000, "eval_re": [220.48667405832336, 155.20427269996225, 
170.4755351025869, 21.096024860944798, 26.463414783180497, 29.694780815603533, 
139.88373850370462, 149.96763494964955, 23.363292252424873, 23.110579632762626],
"eval_len": [140, 92, 95, 32, 30, 34, 97, 100, 28, 37]}

  8%|▊         | 79999/1000000 [1:21:00<11:58:35, 21.34it/s]global step 80000, trans_decision ep_re 67.20232485268795

{"global_step": 80000, "eval_re": [22.426818295796807, 75.50758941989639, 
159.18445240128628, 83.1385157311334, 73.06341574569537, 87.36417645438598, 
27.302159874124072, 101.06658818162515, 20.125835329210727, 22.843697093725318],
"eval_len": [34, 77, 100, 57, 54, 59, 32, 84, 24, 26]}

  9%|▉         | 89999/1000000 [1:31:21<11:58:59, 21.09it/s]global step 90000, trans_decision ep_re 51.26590541076009

{"global_step": 90000, "eval_re": [87.36524556679781, 17.673753842973618, 
15.839146274106051, 58.93976324117232, 15.18778027681832, 18.837499830308698, 
20.421493462188348, 54.59005865878427, 178.9008242185785, 44.90348873587295], 
"eval_len": [70, 22, 24, 62, 21, 21, 27, 49, 109, 50]}

 10%|▉         | 99999/1000000 [1:42:10<11:39:48, 21.43it/s]global step 100000, trans_decision ep_re 48.20549294041007

{"global_step": 100000, "eval_re": [39.46816955790699, 107.22184977153253, 
34.22173638353958, 46.593222398707624, 30.504003235082997, 51.85387213336517, 
22.8969519690509, 59.05992212386853, 49.45349512852883, 40.78170670251756], 
"eval_len": [44, 64, 45, 56, 30, 51, 26, 57, 51, 44]}

 11%|█         | 109998/1000000 [1:52:40<11:28:58, 21.53it/s]global step 110000, trans_decision ep_re 79.00225930106153

{"global_step": 110000, "eval_re": [21.452283840009425, 25.487005228282396, 
301.17137455654915, 22.587057056898136, 24.193761475755363, 19.113424545873297, 
108.11576000857083, 25.136034211430562, 171.87343354079476, 70.89245854645134], 
"eval_len": [29, 27, 134, 25, 31, 26, 77, 27, 98, 69]}

 12%|█▏        | 119997/1000000 [2:03:10<11:23:31, 21.46it/s]global step 120000, trans_decision ep_re 38.21057718944979

{"global_step": 120000, "eval_re": [20.6445537092957, 61.86776313715772, 
25.421077333269093, 93.0344214799236, 28.715464198556795, 19.66385695230233, 
17.656187760739115, 19.861069790019496, 18.60663832798858, 76.63473920524545], 
"eval_len": [25, 56, 44, 76, 29, 23, 21, 25, 28, 69]}

 13%|█▎        | 129999/1000000 [2:13:40<11:14:41, 21.49it/s]global step 130000, trans_decision ep_re 30.13485674356457

{"global_step": 130000, "eval_re": [23.787732836248708, 13.234120525661295, 
16.68529140934648, 22.374700569387855, 55.51589157041575, 82.64452733549365, 
20.491858859626113, 23.550841664916646, 20.071200877944097, 22.992401786605114],
"eval_len": [37, 16, 21, 31, 62, 66, 28, 28, 26, 28]}

 14%|█▍        | 139999/1000000 [2:24:10<11:06:01, 21.52it/s]global step 140000, trans_decision ep_re 50.49374487794747

{"global_step": 140000, "eval_re": [22.66092315484175, 15.490913976411917, 
17.59032460110829, 25.43492834487491, 162.30554403012445, 170.126392666799, 
14.822336167166604, 21.517682464657234, 29.10255893191777, 25.885844441572793], 
"eval_len": [27, 19, 23, 29, 91, 94, 24, 23, 32, 23]}

 15%|█▍        | 149999/1000000 [2:34:40<11:00:02, 21.46it/s]global step 150000, trans_decision ep_re 77.06426146551568

{"global_step": 150000, "eval_re": [19.09099862573515, 34.06194478118162, 
31.441385018395632, 26.788320171946033, 28.829637746428187, 165.15323074764055, 
253.7019672422501, 28.855894642035256, 159.92616107899187, 22.7930746005525], 
"eval_len": [22, 31, 32, 27, 37, 110, 160, 31, 89, 28]}

 16%|█▌        | 159998/1000000 [2:45:20<10:48:45, 21.58it/s]global step 160000, trans_decision ep_re 83.00519757655448

{"global_step": 160000, "eval_re": [165.48430519553287, 94.89457270967058, 
20.51536042139895, 21.738523273926855, 28.28878110760636, 17.08621841577067, 
156.42293583596899, 273.1090667227206, 32.28618869114448, 20.226023391804357], 
"eval_len": [100, 74, 26, 25, 36, 20, 108, 155, 32, 24]}

 17%|█▋        | 169999/1000000 [2:55:50<10:47:09, 21.38it/s]global step 170000, trans_decision ep_re 71.44359380809968

{"global_step": 170000, "eval_re": [40.51940660707911, 19.777425549342944, 
214.98820604687825, 24.001135520416526, 58.734967226807626, 47.47232574486973, 
26.953204614247827, 157.27660795723475, 14.263855564246464, 110.44880324987366],
"eval_len": [45, 24, 117, 32, 52, 51, 26, 99, 20, 83]}

 18%|█▊        | 179999/1000000 [3:06:10<10:33:57, 21.56it/s]global step 180000, trans_decision ep_re 38.03017222404651

{"global_step": 180000, "eval_re": [55.61664490080012, 33.35302351370504, 
31.36069517215961, 20.778898126007963, 51.95137153026391, 47.02139825323651, 
50.55706161145311, 43.4449631910892, 23.367087037309556, 22.850578904440095], 
"eval_len": [51, 45, 33, 34, 45, 49, 53, 47, 35, 31]}

 19%|█▉        | 189999/1000000 [3:17:00<10:27:33, 21.51it/s]global step 190000, trans_decision ep_re 87.58599842060008

{"global_step": 190000, "eval_re": [180.69020502396833, 16.55032130715909, 
17.734047313446887, 124.46793499470797, 46.47215247590538, 20.897368475290023, 
30.82415275139353, 35.94451938077706, 16.323947602695768, 385.9553348806566], 
"eval_len": [118, 18, 26, 95, 44, 27, 38, 43, 22, 172]}

 20%|█▉        | 199998/1000000 [3:27:30<10:12:15, 21.78it/s]global step 200000, trans_decision ep_re 30.267338959842277

{"global_step": 200000, "eval_re": [36.45582632797119, 28.82941260775249, 
23.852482742272084, 18.010815673237694, 25.13064601905299, 33.90966122765202, 
31.448861181406393, 39.166630229029884, 21.157858873241974, 44.71119471680601], 
"eval_len": [42, 39, 28, 18, 31, 42, 42, 44, 25, 41]}

 21%|██        | 209998/1000000 [3:38:00<10:04:37, 21.78it/s]global step 210000, trans_decision ep_re 51.11519885604484

{"global_step": 210000, "eval_re": [24.922720772606766, 32.81509899432094, 
97.1213066001315, 28.344335340956377, 17.871851619547684, 54.77415438472504, 
26.884149262626142, 22.12098852070334, 165.52650396220608, 40.7708791026246], 
"eval_len": [31, 40, 81, 34, 21, 59, 38, 30, 131, 53]}

 22%|██▏       | 219998/1000000 [3:48:30<9:56:21, 21.80it/s]global step 220000, trans_decision ep_re 60.98114950209134

{"global_step": 220000, "eval_re": [25.83976716103197, 155.42910011972248, 
16.34599160871598, 23.974691100051256, 205.69848269934968, 38.09864268241753, 
25.1923886971198, 24.90113715390615, 48.66390679131739, 45.66738700728114], 
"eval_len": [27, 85, 20, 30, 107, 52, 27, 28, 49, 47]}

 23%|██▎       | 229999/1000000 [3:59:00<9:55:58, 21.53it/s]global step 230000, trans_decision ep_re 81.11308765489727

{"global_step": 230000, "eval_re": [111.5387877763303, 172.79995112595694, 
47.93718410159271, 31.260651889477696, 42.594848984108694, 56.37214241950938, 
21.213791216903374, 33.00889570129118, 36.37313485373438, 258.03148848006816], 
"eval_len": [86, 104, 51, 33, 44, 54, 25, 35, 46, 131]}

 24%|██▍       | 239998/1000000 [4:09:30<9:42:28, 21.75it/s]global step 240000, trans_decision ep_re 42.02101508232563

{"global_step": 240000, "eval_re": [17.661052400977542, 25.98951139262034, 
21.39760473446773, 47.944238968487255, 25.802994627221963, 71.83536559644378, 
16.59859776831193, 155.62288910463178, 19.379634452030743, 17.97826177806323], 
"eval_len": [19, 26, 25, 48, 27, 62, 21, 91, 27, 23]}

 25%|██▍       | 249998/1000000 [4:20:00<9:33:22, 21.80it/s]global step 250000, trans_decision ep_re 52.023407759987116

{"global_step": 250000, "eval_re": [23.99539719811146, 31.51207402626247, 
19.21991119884431, 33.86773660974414, 31.132725240789288, 28.084004822006044, 
39.02371437050556, 252.7313110795333, 21.941501565663227, 38.725701488411346], 
"eval_len": [29, 43, 30, 31, 35, 42, 33, 138, 29, 43]}

 26%|██▌       | 259998/1000000 [4:30:30<9:25:46, 21.80it/s]global step 260000, trans_decision ep_re 57.17153141201085

{"global_step": 260000, "eval_re": [13.498936871253502, 23.299202677811643, 
26.348685115908136, 167.41074139755756, 17.39710059394314, 27.80270812543057, 
228.45815772710708, 30.567371615179166, 20.38823654889008, 16.544173447027713], 
"eval_len": [23, 24, 42, 113, 20, 30, 114, 32, 28, 20]}

 27%|██▋       | 269998/1000000 [4:41:00<9:22:41, 21.62it/s]global step 270000, trans_decision ep_re 82.21465605106513

{"global_step": 270000, "eval_re": [305.330633033184, 68.29371846928291, 
19.949838098682267, 38.94125547219928, 18.798690730774616, 35.742587833041796, 
58.952695310714, 90.42950370320538, 27.942197171685685, 157.76544068788135], 
"eval_len": [143, 56, 25, 43, 27, 40, 51, 66, 43, 85]}

 28%|██▊       | 279998/1000000 [4:51:30<9:11:24, 21.76it/s]global step 280000, trans_decision ep_re 59.19668330270887

{"global_step": 280000, "eval_re": [142.7652308048005, 26.750378971795424, 
151.20352101131132, 15.778088570856736, 36.49050244343577, 22.485201693917475, 
107.17712280084184, 36.411294931802146, 27.13645727489744, 25.76903452343008], 
"eval_len": [116, 30, 83, 18, 31, 27, 71, 33, 32, 35]}

 29%|██▉       | 289997/1000000 [5:02:00<9:10:20, 21.50it/s]global step 290000, trans_decision ep_re 37.93971955984862

{"global_step": 290000, "eval_re": [60.810138001576945, 52.59382096697989, 
34.46299084664602, 27.39624080557944, 50.056467189698964, 31.7366050819549, 
33.04544244890222, 25.09538533055535, 27.960697115246408, 36.239407811345956], 
"eval_len": [65, 55, 43, 29, 52, 31, 44, 32, 36, 36]}

 30%|██▉       | 299999/1000000 [5:12:30<8:59:32, 21.62it/s]global step 300000, trans_decision ep_re 44.79613362528987

{"global_step": 300000, "eval_re": [18.214479905894194, 22.434817014103857, 
90.74219780661423, 102.71492570326706, 25.018363469466745, 17.9916941213171, 
27.337216942179793, 97.3063664842417, 23.24959414447075, 22.951680661343286], 
"eval_len": [27, 32, 71, 73, 26, 27, 34, 71, 33, 29]}

 31%|███       | 309999/1000000 [5:23:00<8:53:30, 21.56it/s]global step 310000, trans_decision ep_re 83.56835287177584

{"global_step": 310000, "eval_re": [465.5342519481621, 22.872914962959698, 
82.27986652463849, 78.81500937406152, 29.96482154730182, 31.482922561286657, 
30.253001968617784, 28.943007855296315, 36.248571656662904, 29.289160318771206],
"eval_len": [190, 33, 68, 94, 41, 33, 33, 27, 46, 32]}

 32%|███▏      | 319999/1000000 [5:33:40<8:50:44, 21.35it/s]global step 320000, trans_decision ep_re 75.14999092122157

{"global_step": 320000, "eval_re": [29.343709349113084, 57.676844521121645, 
114.59464710325072, 25.375442016414333, 174.78014657438163, 18.917717640975187, 
23.524727136558802, 102.73602231240321, 176.2192006301267, 28.331451927870326], 
"eval_len": [34, 53, 87, 29, 95, 23, 30, 74, 101, 28]}

 33%|███▎      | 329998/1000000 [5:44:10<8:33:39, 21.74it/s]global step 330000, trans_decision ep_re 61.99610544068099

{"global_step": 330000, "eval_re": [26.591136574015174, 59.34184235181682, 
17.32617997868411, 138.65591405033652, 163.36468861478562, 31.416837250128072, 
80.21247093356217, 32.02999714648035, 27.00410963872872, 44.017877868272144], 
"eval_len": [30, 66, 20, 93, 109, 35, 72, 38, 33, 52]}

 34%|███▍      | 339997/1000000 [5:54:40<8:33:03, 21.44it/s]global step 340000, trans_decision ep_re 53.833625750216456

{"global_step": 340000, "eval_re": [19.702812119520765, 67.88876308719387, 
19.056179658905215, 34.25186028786908, 23.066668932826488, 16.019204605569556, 
17.27707477531306, 232.82216669352258, 31.79448271086833, 76.45704463057561], 
"eval_len": [29, 64, 24, 37, 28, 20, 24, 137, 32, 67]}

 35%|███▍      | 349999/1000000 [6:05:10<8:24:50, 21.46it/s]global step 350000, trans_decision ep_re 119.15387823637653

{"global_step": 350000, "eval_re": [292.36110637906665, 105.1343773006306, 
44.97077504931952, 266.9217537343067, 16.014368158343952, 52.0663709253855, 
166.9432498109933, 21.061712418232617, 208.24628663373332, 17.818781953753014], 
"eval_len": [124, 79, 52, 137, 23, 52, 104, 24, 129, 31]}

 36%|███▌      | 359999/1000000 [6:15:31<8:18:20, 21.40it/s]global step 360000, trans_decision ep_re 50.64717893884166

{"global_step": 360000, "eval_re": [20.940107555583108, 167.94260054133508, 
51.761613351507165, 126.87781635709386, 14.749905947775227, 29.143839967085533, 
28.387675490904414, 18.99208606011817, 29.13400706466544, 18.542137052348664], 
"eval_len": [21, 103, 56, 94, 18, 28, 30, 24, 28, 23]}

 37%|███▋      | 369998/1000000 [6:26:20<8:05:13, 21.64it/s]global step 370000, trans_decision ep_re 35.21984619211706

{"global_step": 370000, "eval_re": [74.41155554736764, 25.69938797314524, 
71.18318012517203, 14.596424513306031, 31.154396620297604, 48.424738638221555, 
19.516399433460325, 16.357407240662305, 28.0808753684392, 22.77409646109865], 
"eval_len": [71, 38, 64, 20, 28, 47, 26, 24, 33, 41]}

 38%|███▊      | 379998/1000000 [6:36:50<7:55:53, 21.71it/s]global step 380000, trans_decision ep_re 64.74493277518022

{"global_step": 380000, "eval_re": [41.09837307567892, 27.47238854570371, 
60.24322036698151, 137.31140307389353, 33.99569454361768, 27.844187651505337, 
18.571390087163955, 83.77532892593393, 196.6429541259169, 20.49438735540672], 
"eval_len": [44, 31, 51, 78, 44, 32, 29, 69, 121, 25]}

 39%|███▉      | 389999/1000000 [6:47:20<7:55:32, 21.38it/s]global step 390000, trans_decision ep_re 38.61895837826386

{"global_step": 390000, "eval_re": [27.41923077787363, 23.863760711180152, 
36.41020373027966, 31.78396831771189, 68.09881340754762, 16.710691648651434, 
82.24284616593938, 30.661492154357813, 21.84590088757902, 47.152675981518044], 
"eval_len": [29, 28, 46, 34, 55, 24, 69, 40, 26, 53]}

 40%|███▉      | 399999/1000000 [6:57:50<7:46:36, 21.43it/s]global step 400000, trans_decision ep_re 58.88618424402696

{"global_step": 400000, "eval_re": [26.269685952742037, 131.16514923070525, 
182.66649097727355, 27.004492968944604, 21.52552056856676, 27.58067495093815, 
74.9765437712316, 36.85408730232513, 39.34020456851381, 21.478992149028777], 
"eval_len": [32, 88, 123, 38, 33, 33, 55, 41, 43, 26]}

 41%|████      | 409999/1000000 [7:08:20<7:36:45, 21.53it/s]global step 410000, trans_decision ep_re 40.510771993966344

{"global_step": 410000, "eval_re": [46.15499387731147, 42.01917033122806, 
16.091207119660567, 26.305327814570763, 22.361648230562565, 15.895813947064536, 
51.470881591120175, 60.765200095250414, 67.47646731462181, 56.56700961827299], 
"eval_len": [48, 40, 21, 29, 29, 19, 50, 55, 52, 45]}

 42%|████▏     | 419999/1000000 [7:18:41<7:31:59, 21.39it/s]global step 420000, trans_decision ep_re 26.75120248874841

{"global_step": 420000, "eval_re": [18.8508864126944, 22.082003602112323, 
32.535041459685374, 15.762576213251146, 20.877147442710655, 24.504537779439165, 
42.2089644342933, 24.32592806526659, 23.224530260612497, 43.140409217418714], 
"eval_len": [20, 24, 32, 23, 25, 24, 42, 28, 25, 47]}

 43%|████▎     | 429999/1000000 [7:29:12<7:23:17, 21.43it/s]global step 430000, trans_decision ep_re 28.161829582242415

{"global_step": 430000, "eval_re": [38.8747261626112, 17.363964891956943, 
31.002449815264224, 28.714676039587967, 16.335377964302744, 24.995354756594917, 
22.828786033662936, 39.163870827755424, 27.02445514624943, 35.314634184438354], 
"eval_len": [36, 21, 31, 34, 20, 27, 23, 41, 35, 35]}

 44%|████▍     | 439998/1000000 [7:40:00<7:12:26, 21.58it/s]global step 440000, trans_decision ep_re 34.640927920243335

{"global_step": 440000, "eval_re": [27.783127685383214, 43.57726831610032, 
43.45862844817478, 18.458001202876087, 32.749544949837315, 22.623934390898192, 
25.83888195542689, 24.60651758079486, 22.3812994748664, 84.93207519807532], 
"eval_len": [36, 44, 50, 26, 32, 33, 36, 26, 25, 79]}

 45%|████▍     | 449998/1000000 [7:50:30<7:04:07, 21.61it/s]global step 450000, trans_decision ep_re 47.833680404980065

{"global_step": 450000, "eval_re": [18.320776045336377, 30.667982336875948, 
16.676008109261378, 25.96042094218519, 31.420223139751766, 23.252860653004852, 
33.32903306263737, 159.5231827331628, 17.10463228145413, 122.08168474613083], 
"eval_len": [37, 39, 20, 28, 36, 24, 39, 115, 18, 83]}

 46%|████▌     | 459999/1000000 [8:01:00<6:56:25, 21.61it/s]global step 460000, trans_decision ep_re 48.244973607374206

{"global_step": 460000, "eval_re": [39.77150267734797, 31.34563879211995, 
27.425418971616203, 24.966311432719337, 45.006838977623254, 180.04622662940739, 
28.761682910838992, 27.899870073818146, 31.766718657280716, 45.459526950970066],
"eval_len": [36, 44, 30, 27, 46, 107, 29, 37, 30, 44]}

 47%|████▋     | 469999/1000000 [8:11:30<6:51:10, 21.48it/s]global step 470000, trans_decision ep_re 92.44276835169893

{"global_step": 470000, "eval_re": [72.60232366761704, 195.51656388164415, 
20.656437895905828, 184.4828540235828, 19.113125794119078, 33.94458233703344, 
102.15362353256351, 17.84706179089061, 255.10009008932542, 23.011020504307325], 
"eval_len": [58, 148, 27, 103, 29, 34, 83, 21, 129, 27]}

 48%|████▊     | 479999/1000000 [8:21:50<6:43:22, 21.49it/s]global step 480000, trans_decision ep_re 44.978269243943

{"global_step": 480000, "eval_re": [47.73996917649188, 21.071500997293636, 
31.928299777944435, 27.120753556502606, 27.680955905613846, 32.70622447032073, 
66.764432096688, 79.48072128428633, 84.14710750015117, 31.142727674137323], 
"eval_len": [48, 26, 41, 33, 38, 35, 61, 68, 67, 35]}

 49%|████▉     | 489999/1000000 [8:32:40<6:37:35, 21.38it/s]global step 490000, trans_decision ep_re 61.8484093218834

{"global_step": 490000, "eval_re": [29.005602015970755, 234.11112136360902, 
23.420689056701484, 31.41195941400425, 31.765900774634378, 105.00517823340778, 
16.966884558130147, 18.900376813622582, 27.028078400159075, 100.86830258859462],
"eval_len": [33, 161, 24, 29, 32, 86, 19, 21, 33, 74]}

 50%|████▉     | 499998/1000000 [8:43:10<6:22:40, 21.78it/s]global step 500000, trans_decision ep_re 74.49042926622016

{"global_step": 500000, "eval_re": [47.231907428973535, 238.25221018790538, 
30.115445044221975, 182.91513666079643, 74.61994049668914, 15.865767341383233, 
93.97997959487323, 20.797453229609253, 18.187418379612012, 22.93903429813736], 
"eval_len": [49, 130, 30, 126, 65, 19, 70, 32, 24, 29]}

 51%|█████     | 509999/1000000 [8:53:40<6:20:28, 21.46it/s]global step 510000, trans_decision ep_re 93.04129674769287

{"global_step": 510000, "eval_re": [28.84764804241071, 47.02155823936223, 
19.314464107977518, 27.788619750969563, 21.233841163693807, 31.906630336211325, 
279.2836540086354, 29.695761535395032, 415.9346209888869, 29.386169303386236], 
"eval_len": [30, 54, 27, 32, 32, 44, 124, 42, 190, 30]}

 52%|█████▏    | 519999/1000000 [9:04:10<6:13:48, 21.40it/s]global step 520000, trans_decision ep_re 72.87949246622051

{"global_step": 520000, "eval_re": [38.29006156459949, 322.0272358289726, 
19.09338082076489, 26.72136285119421, 23.680879672840035, 210.28435106690165, 
22.91484619132563, 20.623243740522607, 24.581033118697004, 20.578529806387042], 
"eval_len": [49, 150, 21, 30, 25, 119, 29, 31, 24, 24]}

 53%|█████▎    | 529999/1000000 [9:14:31<6:01:27, 21.67it/s]global step 530000, trans_decision ep_re 80.91298570913956

{"global_step": 530000, "eval_re": [160.15777642770513, 20.217016790625067, 
33.95483963975832, 17.99746558557408, 18.027699757895554, 192.231410657613, 
17.217944736295916, 307.83371131517697, 24.747922160972422, 16.744070019779116],
"eval_len": [90, 32, 47, 25, 21, 108, 28, 150, 23, 32]}

 54%|█████▍    | 539998/1000000 [9:25:20<5:54:35, 21.62it/s]global step 540000, trans_decision ep_re 106.97850987044394

{"global_step": 540000, "eval_re": [160.9744437303538, 130.65794017356137, 
116.77066764928716, 158.92187563907547, 23.65080146948169, 214.51180760405816, 
201.7564520056344, 24.725103149596887, 25.680945869326354, 12.135061414064346], 
"eval_len": [101, 91, 74, 88, 30, 118, 125, 28, 45, 18]}

 55%|█████▍    | 549997/1000000 [9:35:50<5:50:24, 21.40it/s]global step 550000, trans_decision ep_re 48.42103853445797

{"global_step": 550000, "eval_re": [46.706737992578574, 196.3035434231848, 
19.159271412465177, 19.542971246789037, 12.825109821611727, 53.699713620337306, 
38.32553970499347, 33.2895005066615, 35.217321799245894, 29.14067581671222], 
"eval_len": [49, 109, 24, 24, 28, 44, 47, 39, 46, 38]}

 56%|█████▌    | 559999/1000000 [9:46:20<5:42:59, 21.38it/s]global step 560000, trans_decision ep_re 50.838398442313

{"global_step": 560000, "eval_re": [76.67736255440869, 173.763067429184, 
21.972240869354394, 26.119864473439325, 14.49517390203192, 43.72242254732206, 
25.445921440045158, 15.023307093072784, 90.88411972238029, 20.2805043918914], 
"eval_len": [57, 91, 30, 34, 17, 46, 25, 30, 71, 27]}

 57%|█████▋    | 569999/1000000 [9:56:40<5:34:31, 21.42it/s]global step 570000, trans_decision ep_re 43.78416568134179

{"global_step": 570000, "eval_re": [20.17577869998102, 12.16110461007241, 
30.108047481281858, 25.35993245840807, 15.528923018099185, 96.356228006303, 
165.78676913915075, 29.173750348969747, 23.39128277505283, 19.79984027609904], 
"eval_len": [23, 28, 43, 27, 19, 65, 84, 44, 28, 20]}

 58%|█████▊    | 579999/1000000 [10:07:30<5:27:45, 21.36it/s]global step 580000, trans_decision ep_re 55.809956867393666

{"global_step": 580000, "eval_re": [134.41354955126766, 14.83025830129722, 
28.931904888640705, 136.66898488609607, 24.639008007538738, 16.52316510454119, 
14.83574593250765, 14.578178388704977, 12.170659019773895, 160.50811459356856], 
"eval_len": [86, 20, 38, 99, 23, 22, 16, 18, 17, 102]}

 59%|█████▉    | 589998/1000000 [10:18:00<5:14:44, 21.71it/s]global step 590000, trans_decision ep_re 58.77925882116396

{"global_step": 590000, "eval_re": [80.3173354496421, 19.221995995091156, 
21.081650256264144, 14.813844974802356, 23.21761304751464, 14.39630028035108, 
84.63416812528874, 113.18558343040755, 195.52585660469686, 21.39824004758096], 
"eval_len": [84, 23, 30, 17, 24, 24, 56, 94, 127, 26]}

 60%|█████▉    | 599998/1000000 [10:28:30<5:06:03, 21.78it/s]global step 600000, trans_decision ep_re 93.72179451385018

{"global_step": 600000, "eval_re": [123.05717063562993, 379.857261910118, 
114.48408787031678, 42.15842537178885, 23.529140263495766, 30.784511683214653, 
28.514873791861284, 134.18106643622187, 25.67881505130665, 34.97259212454815], 
"eval_len": [81, 174, 73, 47, 27, 41, 34, 96, 26, 40]}

 61%|██████    | 609999/1000000 [10:39:00<5:02:53, 21.46it/s]global step 610000, trans_decision ep_re 57.11590252678261

{"global_step": 610000, "eval_re": [106.38931323082116, 98.33762242219836, 
65.33056171771615, 35.053449530948214, 35.23591738086023, 20.512242561862095, 
29.98379225463556, 38.96214276643617, 30.615230593071807, 110.73875280927633], 
"eval_len": [65, 66, 57, 43, 38, 21, 31, 44, 41, 80]}

 62%|██████▏   | 619999/1000000 [10:49:21<4:54:25, 21.51it/s]global step 620000, trans_decision ep_re 45.63671886108433

{"global_step": 620000, "eval_re": [159.02137708163522, 28.362192638103817, 
17.39324847992086, 22.11874797943198, 25.370730156166058, 95.32559072207373, 
23.154603042325224, 21.51937179107086, 41.22808142338651, 22.873245296728996], 
"eval_len": [87, 29, 22, 24, 23, 63, 31, 27, 52, 25]}

 63%|██████▎   | 629999/1000000 [11:00:10<4:47:28, 21.45it/s]global step 630000, trans_decision ep_re 77.74094390480823

{"global_step": 630000, "eval_re": [112.58827026908543, 28.379754627572407, 
95.61222299661823, 23.64562835181962, 160.06621651415676, 112.90180566853435, 
20.006281257039575, 17.535163471460287, 40.89208646426423, 165.78200942753125], 
"eval_len": [79, 34, 63, 30, 103, 76, 35, 19, 47, 95]}

 64%|██████▍   | 639998/1000000 [11:10:40<4:37:43, 21.60it/s]global step 640000, trans_decision ep_re 64.29903146481344

{"global_step": 640000, "eval_re": [18.95469170901327, 45.617398523176306, 
14.64002376644922, 29.203119192747568, 18.16310252976799, 108.54760582497555, 
33.820941796338325, 201.71219903242562, 31.83846394423191, 140.4927683290086], 
"eval_len": [21, 47, 18, 30, 19, 82, 39, 115, 31, 89]}

 65%|██████▍   | 649999/1000000 [11:21:10<4:31:15, 21.50it/s]global step 650000, trans_decision ep_re 110.70028670453318

{"global_step": 650000, "eval_re": [194.04320812060942, 52.92030657761177, 
266.5937119943254, 68.32512361364294, 144.02998973168292, 72.81515829636656, 
19.342398622173796, 122.77237594595442, 149.12197704887956, 17.038617094084987],
"eval_len": [108, 46, 163, 61, 93, 66, 24, 91, 96, 30]}

 66%|██████▌   | 659999/1000000 [11:31:32<4:23:44, 21.49it/s]global step 660000, trans_decision ep_re 26.968369678921288

{"global_step": 660000, "eval_re": [38.615558548614445, 16.614006593193466, 
23.005810229296134, 28.653961265728615, 55.834364936828315, 27.31333279472891, 
25.054117863605494, 17.173887640976925, 19.74362008765143, 17.67503682858912], 
"eval_len": [42, 24, 25, 34, 56, 27, 30, 20, 24, 22]}

 67%|██████▋   | 669999/1000000 [11:42:20<4:16:18, 21.46it/s]global step 670000, trans_decision ep_re 119.73623156077437

{"global_step": 670000, "eval_re": [18.786685326486694, 20.246581619153567, 
49.170701102120944, 274.31907437045624, 242.10246533169712, 268.07744592802254, 
176.07700424383145, 42.64438567858602, 25.41581449822434, 80.52215750916476], 
"eval_len": [30, 23, 56, 144, 121, 140, 95, 52, 31, 71]}

 68%|██████▊   | 679998/1000000 [11:52:50<4:05:57, 21.68it/s]global step 680000, trans_decision ep_re 34.00982285748963

{"global_step": 680000, "eval_re": [24.981175143273497, 69.20061230910066, 
36.30276327568766, 15.073237767183324, 25.917693090937068, 51.304240227235766, 
25.58538918585299, 11.936245800208626, 34.17077268558021, 45.62609908983655], 
"eval_len": [34, 83, 32, 17, 30, 52, 32, 19, 45, 44]}

 69%|██████▉   | 689999/1000000 [12:03:20<3:59:01, 21.62it/s]global step 690000, trans_decision ep_re 41.964570880791754

{"global_step": 690000, "eval_re": [19.460760953912718, 28.659436015377725, 
17.712630325980022, 27.543813638879357, 42.05592682184051, 24.89179488068302, 
39.73689550758985, 179.08927674950328, 18.19087249794944, 22.30430141620159], 
"eval_len": [24, 30, 27, 32, 48, 33, 48, 110, 27, 25]}

 70%|██████▉   | 699999/1000000 [12:13:50<3:53:16, 21.43it/s]global step 700000, trans_decision ep_re 138.13071533141067

{"global_step": 700000, "eval_re": [353.7549880759442, 62.84354840964973, 
351.91401862973487, 253.18934855965614, 165.26200776481167, 24.17628768689269, 
22.292550101348265, 24.066597130033323, 95.91850945906418, 27.88929749697174], 
"eval_len": [160, 57, 151, 129, 94, 27, 25, 26, 69, 33]}

 71%|███████   | 709999/1000000 [12:24:12<3:45:01, 21.48it/s]global step 710000, trans_decision ep_re 30.295559531802006

{"global_step": 710000, "eval_re": [22.281921475867122, 51.708231357344715, 
34.06552417678299, 14.833580227001073, 20.286589216901053, 32.486045005231084, 
52.489748285086726, 20.606711817572222, 25.55493098543939, 28.64231277079368], 
"eval_len": [26, 50, 46, 18, 25, 36, 50, 24, 30, 30]}

 72%|███████▏  | 719999/1000000 [12:35:00<3:37:56, 21.41it/s]global step 720000, trans_decision ep_re 53.25138010296585

{"global_step": 720000, "eval_re": [22.468094572707923, 19.637836533868693, 
232.12012697974225, 23.35366016190663, 26.11637245467391, 18.127356953998312, 
33.24057812639058, 108.31227102225029, 23.050245876581055, 26.087258347538803], 
"eval_len": [29, 22, 124, 26, 30, 21, 46, 92, 26, 28]}

 73%|███████▎  | 729998/1000000 [12:45:30<3:27:33, 21.68it/s]global step 730000, trans_decision ep_re 48.42406259791435

{"global_step": 730000, "eval_re": [51.33802376299167, 175.30522284691443, 
65.92558366891247, 26.58588957420914, 18.47987139723193, 46.8487378259035, 
19.355181802365117, 32.60081888346144, 25.67931885255649, 22.121977364597278], 
"eval_len": [52, 100, 62, 32, 27, 49, 20, 43, 24, 29]}

 74%|███████▍  | 739998/1000000 [12:56:00<3:20:22, 21.63it/s]global step 740000, trans_decision ep_re 55.585722708246024

{"global_step": 740000, "eval_re": [43.99425853677442, 24.757867952531285, 
24.59191909611166, 39.4285662564576, 19.627204762134898, 20.49438159967564, 
204.46304246596418, 27.32324948893146, 23.336439279428397, 127.84029764445059], 
"eval_len": [40, 30, 32, 37, 20, 21, 113, 39, 30, 78]}

 75%|███████▍  | 749999/1000000 [13:06:30<3:13:56, 21.48it/s]global step 750000, trans_decision ep_re 70.90092594347288

{"global_step": 750000, "eval_re": [153.75188894503168, 21.524702140052163, 
159.0179385921789, 55.96243604178528, 36.12847830185237, 151.54495145220068, 
16.12271314190034, 31.95432199983056, 19.711097228674376, 63.290731591222595], 
"eval_len": [90, 25, 89, 59, 48, 93, 23, 33, 27, 59]}

 76%|███████▌  | 759999/1000000 [13:17:00<3:05:17, 21.59it/s]global step 760000, trans_decision ep_re 48.46902379796394

{"global_step": 760000, "eval_re": [20.081302603981744, 80.70128104819038, 
15.610946113921882, 21.949637659691337, 42.23785661433952, 123.60336454445523, 
92.24180829326824, 24.79524901775864, 48.572667610679176, 14.896124473353279], 
"eval_len": [29, 87, 27, 29, 46, 94, 69, 33, 49, 20]}

 77%|███████▋  | 769999/1000000 [13:27:22<2:58:34, 21.47it/s]global step 770000, trans_decision ep_re 35.4002130827502

{"global_step": 770000, "eval_re": [76.06400526993245, 24.18513803099099, 
44.983253783359416, 29.42607352730773, 20.829054449176535, 27.190476521186707, 
16.756620820204727, 24.449990820704617, 50.60069913369426, 39.516818470944465], 
"eval_len": [58, 26, 50, 26, 24, 32, 20, 24, 46, 49]}

 78%|███████▊  | 779998/1000000 [13:38:10<2:48:34, 21.75it/s]global step 780000, trans_decision ep_re 59.474585521844915

{"global_step": 780000, "eval_re": [16.404061476961218, 150.81416233588254, 
22.80257742409428, 36.14901662566608, 148.91498772750995, 56.00586252632598, 
30.827709985290014, 69.27860582819635, 21.167635869295005, 42.3812354192277], 
"eval_len": [25, 84, 24, 45, 97, 57, 42, 57, 21, 48]}

 79%|███████▉  | 789998/1000000 [13:48:40<2:41:12, 21.71it/s]global step 790000, trans_decision ep_re 53.66922149439824

{"global_step": 790000, "eval_re": [26.956797741768337, 102.43005212312904, 
50.49473937216734, 19.329638210684767, 16.527500468216154, 24.976279742817603, 
28.558339752195657, 176.57138942967714, 68.19354067932123, 22.653937424005232], 
"eval_len": [32, 82, 60, 25, 25, 30, 27, 109, 62, 23]}

 80%|███████▉  | 799999/1000000 [13:59:10<2:35:28, 21.44it/s]global step 800000, trans_decision ep_re 99.25836579405716

{"global_step": 800000, "eval_re": [21.555156645388227, 114.29750010392392, 
179.10524367120496, 169.61051763011494, 135.10556353504896, 20.665883788082226, 
31.026921907365512, 104.52498726742144, 198.00881749089575, 18.68306590112568], 
"eval_len": [22, 78, 125, 105, 104, 25, 34, 85, 119, 30]}

 81%|████████  | 809999/1000000 [14:09:40<2:26:15, 21.65it/s]global step 810000, trans_decision ep_re 78.16273780946025

{"global_step": 810000, "eval_re": [75.88632681583366, 212.05125662884154, 
86.57601713538605, 25.193512995153295, 21.110349462956812, 94.19089924927455, 
109.72318005423695, 111.98754022146188, 17.283552862293234, 27.62474266916448], 
"eval_len": [51, 124, 71, 30, 22, 72, 76, 67, 28, 34]}

 82%|████████▏ | 819999/1000000 [14:20:10<2:19:50, 21.45it/s]global step 820000, trans_decision ep_re 42.5324261704878

{"global_step": 820000, "eval_re": [21.73733266958879, 18.108849031627717, 
44.172426813554836, 104.47619132387254, 28.487152483272485, 29.177225817465445, 
24.83232409533355, 29.972161887954016, 100.22030628915049, 24.14029129305822], 
"eval_len": [29, 25, 46, 83, 28, 33, 29, 32, 69, 29]}

 83%|████████▎ | 829999/1000000 [14:30:40<2:11:18, 21.58it/s]global step 830000, trans_decision ep_re 52.68931107181877

{"global_step": 830000, "eval_re": [75.30661625097495, 50.844696293624565, 
29.113979401490624, 32.95998766467117, 27.634065510753302, 24.719605280641105, 
205.82953428842907, 40.175135410117754, 19.179792318956444, 21.129698298528705],
"eval_len": [63, 50, 28, 40, 31, 28, 111, 44, 22, 26]}

 84%|████████▍ | 839999/1000000 [14:41:02<2:04:05, 21.49it/s]global step 840000, trans_decision ep_re 55.689583102000675

{"global_step": 840000, "eval_re": [25.76759024653962, 108.50308300718686, 
28.64685321433499, 113.80173482012819, 26.05861746358344, 137.30172805331895, 
51.876997605803076, 20.6139366868534, 26.153599599031637, 18.171690323226752], 
"eval_len": [28, 69, 33, 78, 28, 134, 45, 25, 33, 21]}

 85%|████████▍ | 849998/1000000 [14:51:51<1:55:22, 21.67it/s]global step 850000, trans_decision ep_re 71.47507308494501

{"global_step": 850000, "eval_re": [24.725305117956974, 79.51390848916608, 
35.874699028880165, 80.48461834732342, 43.18602509832971, 21.49894336624696, 
117.68378929628085, 17.338287723119816, 117.64577317472366, 176.79938120742247],
"eval_len": [29, 63, 37, 53, 53, 25, 82, 23, 87, 121]}

 86%|████████▌ | 859999/1000000 [15:02:21<1:48:36, 21.48it/s]global step 860000, trans_decision ep_re 56.59216922562398

{"global_step": 860000, "eval_re": [88.18355754866124, 193.06383741169472, 
19.01429827418449, 20.68118449657937, 24.423673480340735, 23.127449310304232, 
20.679120546129486, 45.676352900247586, 107.32910313305007, 23.74311515504786], 
"eval_len": [90, 125, 19, 30, 31, 22, 27, 44, 72, 30]}

 87%|████████▋ | 869999/1000000 [15:12:51<1:40:50, 21.49it/s]global step 870000, trans_decision ep_re 61.37679032166189

{"global_step": 870000, "eval_re": [23.344204080109638, 25.061010841278407, 
74.71788120802232, 30.443097514874953, 182.98905355804382, 14.470590250536041, 
24.694371212246367, 111.8976360954066, 22.641117186672208, 103.50894126942863], 
"eval_len": [26, 27, 57, 31, 108, 16, 32, 75, 27, 79]}

 88%|████████▊ | 879999/1000000 [15:23:12<1:32:28, 21.63it/s]global step 880000, trans_decision ep_re 97.69929820497616

{"global_step": 880000, "eval_re": [49.034523278104444, 18.87444302084932, 
20.831200832440516, 44.30828927017338, 427.96956758764566, 40.49067267574956, 
37.6605870195303, 231.84899215138506, 83.41530092932918, 22.559405284554128], 
"eval_len": [46, 21, 33, 51, 163, 47, 47, 123, 60, 29]}

 89%|████████▉ | 889998/1000000 [15:34:01<1:24:44, 21.63it/s]global step 890000, trans_decision ep_re 36.85020614009779

{"global_step": 890000, "eval_re": [41.77015501970266, 25.416728764377126, 
29.789002271122737, 39.51717523455985, 21.948556398482097, 35.71292555016416, 
62.41715224082819, 42.64105949107856, 36.813549424896095, 32.47575700576641], 
"eval_len": [42, 26, 43, 54, 21, 42, 51, 44, 42, 37]}

 90%|████████▉ | 899998/1000000 [15:44:31<1:16:38, 21.75it/s]global step 900000, trans_decision ep_re 37.062018405145444

{"global_step": 900000, "eval_re": [22.934951438782182, 19.22753034495078, 
110.86051895909462, 33.38393121173758, 31.12699378622451, 15.115399427546098, 
37.51996631137866, 20.055717726760818, 53.5792928544558, 26.8158819905234], 
"eval_len": [24, 21, 82, 39, 30, 20, 36, 28, 49, 32]}

 91%|█████████ | 909998/1000000 [15:55:01<1:08:58, 21.75it/s]global step 910000, trans_decision ep_re 98.11021998030076

{"global_step": 910000, "eval_re": [153.2279419276056, 26.31738950259177, 
102.17019827608745, 19.694019543651372, 207.58314898245828, 72.88554877908227, 
16.855472514683278, 135.4980197218553, 31.75958866347731, 215.110871891515], 
"eval_len": [92, 30, 81, 23, 104, 71, 24, 86, 37, 117]}

 92%|█████████▏| 919999/1000000 [16:05:31<1:02:03, 21.48it/s]global step 920000, trans_decision ep_re 29.764291103180945

{"global_step": 920000, "eval_re": [27.76328351961971, 10.092151640585053, 
42.78040057439603, 37.128918354171255, 43.68220136705768, 25.732684251725487, 
18.97009626410523, 33.63203271221471, 28.833859215631634, 29.027283132302646], 
"eval_len": [40, 14, 46, 39, 47, 26, 28, 37, 31, 41]}

 93%|█████████▎| 929999/1000000 [16:16:01<54:11, 21.53it/s]global step 930000, trans_decision ep_re 29.72970810597697

{"global_step": 930000, "eval_re": [31.91176602623781, 35.37987619840739, 
27.253608556087443, 29.145327694943177, 17.312058162906336, 21.36103712904738, 
25.96432324329238, 33.69277695958422, 30.1588432537141, 45.11746383554948], 
"eval_len": [33, 43, 29, 35, 31, 43, 32, 42, 34, 43]}

 94%|█████████▍| 939999/1000000 [16:26:31<46:34, 21.47it/s]global step 940000, trans_decision ep_re 86.52553576888069

{"global_step": 940000, "eval_re": [186.07742902916908, 107.75398321705599, 
28.921731881542048, 204.50992770012587, 163.71029752254776, 62.14970160457767, 
28.039633177074347, 25.422243345423478, 19.90596768311005, 38.76444252818052], 
"eval_len": [90, 86, 39, 112, 98, 55, 32, 30, 24, 47]}

 95%|█████████▍| 949999/1000000 [16:36:51<38:34, 21.60it/s]global step 950000, trans_decision ep_re 28.845188167908418

{"global_step": 950000, "eval_re": [30.67897904383292, 20.729225537987812, 
28.06835600870883, 23.876275239824185, 26.842559507476835, 31.282279565811887, 
48.76331570447622, 30.16643037385919, 22.153954863060186, 25.890505834046085], 
"eval_len": [37, 26, 28, 32, 50, 53, 60, 52, 49, 50]}

 96%|█████████▌| 959999/1000000 [16:47:41<31:02, 21.47it/s]global step 960000, trans_decision ep_re 120.70811469529403

{"global_step": 960000, "eval_re": [194.9348544504266, 65.4331119184439, 
227.57945409678948, 162.29959353806132, 165.58120292823685, 258.61425697454473, 
53.3276895971677, 15.065302191730352, 45.12582804529683, 19.119853212242596], 
"eval_len": [224, 95, 155, 154, 152, 185, 81, 20, 75, 22]}

 97%|█████████▋| 969998/1000000 [16:58:11<22:58, 21.76it/s]global step 970000, trans_decision ep_re 50.31998886510119

{"global_step": 970000, "eval_re": [29.358482604607946, 38.668582678788894, 
32.91495540382744, 64.5176931342868, 28.44128195804808, 162.89509482488268, 
60.934662698259814, 24.536728328338853, 42.19643999016024, 18.735967029811174], 
"eval_len": [32, 39, 47, 60, 39, 99, 60, 28, 42, 24]}

 98%|█████████▊| 979998/1000000 [17:08:41<15:25, 21.61it/s]global step 980000, trans_decision ep_re 30.94800064581765

{"global_step": 980000, "eval_re": [30.25681512901583, 32.705269559122605, 
39.218007149510626, 17.310344315070136, 45.09861709252601, 37.02083805642506, 
27.866312193761107, 22.220250884087367, 28.851817610159593, 28.931734468498174],
"eval_len": [30, 35, 45, 20, 46, 42, 29, 42, 32, 32]}

 99%|█████████▉| 989999/1000000 [17:19:11<07:43, 21.59it/s]global step 990000, trans_decision ep_re 53.633319957334336

{"global_step": 990000, "eval_re": [20.7781725610238, 21.58001139281884, 
24.97405597141474, 30.018133414731746, 15.973006933000045, 60.35849497432259, 
17.503945070389864, 19.121449094115963, 23.59609306967137, 302.4298370918544], 
"eval_len": [26, 32, 30, 32, 19, 51, 29, 25, 28, 129]}

100%|█████████▉| 999999/1000000 [17:29:41<00:00, 21.42it/s]global step 1000000, trans_decision ep_re 38.198839393198334

{"global_step": 1000000, "eval_re": [19.911634247053605, 32.222268172252846, 
28.866892156971414, 54.09165757898764, 53.031586128362804, 31.354765863025364, 
37.174727772336304, 51.11492571780972, 46.24099749777653, 27.97893879740714], 
"eval_len": [21, 41, 31, 49, 48, 29, 41, 49, 49, 27]}

100%|██████████| 1000000/1000000 [17:29:47<00:00, 15.88it/s]
