
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:30<10:38:41, 25.83it/s]global step 10000, trans_decision ep_re 175.9772816634433

{"global_step": 10000, "eval_re": [243.63038970911853, 196.54227144936186, 
251.64084797894753, 27.049675167729326, 21.49813579936622, 21.18790411969196, 
227.83668405268045, 303.6866983445998, 200.61956684541005, 266.0806431675273], 
"eval_len": [132, 111, 140, 36, 30, 30, 125, 181, 121, 160]}

  2%|▏         | 19999/1000000 [13:10<10:41:02, 25.48it/s]global step 20000, trans_decision ep_re 98.23350013609182

{"global_step": 20000, "eval_re": [21.776689935887433, 19.519406365833724, 
272.22039866614483, 33.47693923655209, 24.68968415941436, 26.11481518953696, 
28.025427754747017, 230.8357580074726, 302.8668034154728, 22.80907862985631], 
"eval_len": [30, 29, 158, 42, 31, 33, 34, 137, 188, 30]}

  3%|▎         | 29997/1000000 [21:33<10:27:23, 25.77it/s]global step 30000, trans_decision ep_re 32.09561130800465

{"global_step": 30000, "eval_re": [20.842698426771932, 13.240101203711763, 
19.52106753494108, 28.45737096032186, 26.30486824467175, 28.34796536361451, 
28.916865185205324, 23.035272240406677, 112.59410555087894, 19.695798369522606],
"eval_len": [30, 22, 30, 40, 38, 37, 35, 31, 97, 31]}

  4%|▍         | 39999/1000000 [30:20<10:25:55, 25.56it/s]global step 40000, trans_decision ep_re 26.573536279194542

{"global_step": 40000, "eval_re": [37.04334795456698, 32.191965668514406, 
26.891051020390698, 23.312213777713886, 28.93570237330999, 21.281704422500553, 
25.484455290746304, 22.372504195073855, 23.653548709157224, 24.568869379971495],
"eval_len": [43, 39, 36, 30, 36, 30, 34, 31, 32, 34]}

  5%|▍         | 49999/1000000 [38:45<10:16:16, 25.69it/s]global step 50000, trans_decision ep_re 23.406894604607494

{"global_step": 50000, "eval_re": [22.5459182246736, 27.502632145494747, 
27.105498092017232, 21.550785229532398, 18.05715040645468, 33.106095206200564, 
30.456527815740166, 17.58675421626463, 12.609530728321845, 23.548053981375073], 
"eval_len": [30, 34, 34, 31, 28, 37, 35, 29, 24, 31]}

  6%|▌         | 59999/1000000 [47:21<10:18:08, 25.34it/s]global step 60000, trans_decision ep_re 51.322944058892574

{"global_step": 60000, "eval_re": [23.298102516376186, 20.779805787966826, 
23.286873166415713, 23.24535315954786, 21.58771685961288, 24.642475284491233, 
25.04270672000846, 28.08262657055993, 15.339600002079031, 307.92418052186764], 
"eval_len": [31, 30, 31, 30, 29, 32, 33, 34, 26, 179]}

  7%|▋         | 69997/1000000 [56:10<10:03:32, 25.68it/s]global step 70000, trans_decision ep_re 22.876925251687343

{"global_step": 70000, "eval_re": [18.785135011666906, 29.317744017120518, 
20.98664273500526, 24.648294075109675, 14.16882487438595, 23.678013705340963, 
17.080939669223426, 17.723220454407436, 18.886414247103893, 43.494023727509386],
"eval_len": [29, 35, 29, 32, 26, 32, 27, 28, 30, 45]}

  8%|▊         | 79997/1000000 [1:04:33<10:00:09, 25.55it/s]global step 80000, trans_decision ep_re 27.02136472223204

{"global_step": 80000, "eval_re": [86.28095988503125, 17.839309292668503, 
16.88786922543176, 18.26277969050415, 22.78880159585743, 25.403592326311685, 
21.270383954958398, 23.026263618880215, 15.508624074114794, 22.945063558562182],
"eval_len": [79, 28, 27, 28, 31, 30, 30, 31, 27, 32]}

  9%|▉         | 89999/1000000 [1:13:10<9:49:35, 25.72it/s]global step 90000, trans_decision ep_re 26.822787304770106

{"global_step": 90000, "eval_re": [24.68273811546378, 22.226877030096865, 
25.077250790130922, 19.86996392049688, 19.323101714727265, 54.43102936276336, 
19.68534513024501, 22.032956550129803, 50.031631938963216, 10.866978494683885], 
"eval_len": [30, 31, 34, 30, 29, 63, 29, 31, 59, 23]}

 10%|▉         | 99999/1000000 [1:21:46<9:45:57, 25.60it/s]global step 100000, trans_decision ep_re 20.769190067261608

{"global_step": 100000, "eval_re": [24.2409901553325, 23.25977174277683, 
16.249813041759527, 18.08408742730911, 19.61202156507169, 24.498621096773434, 
20.453812174462502, 22.96588203919492, 18.014839517517768, 20.31206191241779], 
"eval_len": [32, 31, 27, 27, 28, 31, 29, 30, 29, 32]}

 11%|█         | 109997/1000000 [1:30:24<9:42:04, 25.48it/s]global step 110000, trans_decision ep_re 22.600692316988823

{"global_step": 110000, "eval_re": [18.391154078822982, 18.448114411097624, 
25.052253566463712, 25.27411048991059, 23.129000619326835, 23.063094335386914, 
21.127345651435718, 22.29748965840454, 19.355840427505413, 29.86851993153391], 
"eval_len": [28, 28, 31, 32, 31, 30, 30, 32, 29, 34]}

 12%|█▏        | 119999/1000000 [1:39:01<9:31:05, 25.68it/s]global step 120000, trans_decision ep_re 51.76684704475345

{"global_step": 120000, "eval_re": [323.3608465604848, 20.399840252842814, 
20.60703962578642, 21.154590893101876, 21.497183217784563, 20.113089356927606, 
20.642233306081316, 18.616070727203038, 24.710703113487455, 26.5668733938346], 
"eval_len": [195, 30, 30, 30, 30, 30, 30, 29, 31, 33]}

 13%|█▎        | 129999/1000000 [1:47:50<9:31:51, 25.36it/s]global step 130000, trans_decision ep_re 21.847726572560823

{"global_step": 130000, "eval_re": [25.871119747201185, 22.05882738042774, 
24.056227003044285, 20.06835500253149, 22.380479000350256, 23.19999761909049, 
25.600606194783403, 18.391185702236083, 21.589288928255627, 15.261179147687676],
"eval_len": [33, 30, 32, 28, 32, 32, 33, 29, 29, 27]}

 14%|█▍        | 139998/1000000 [1:56:30<9:09:52, 26.07it/s]global step 140000, trans_decision ep_re 20.556344491911787

{"global_step": 140000, "eval_re": [20.3718121892254, 22.753742783394667, 
20.630884060729887, 20.097811472503015, 21.145790199968765, 19.327634971046837, 
22.337122169807593, 13.56301051888286, 20.76805530344393, 24.56758125011491], 
"eval_len": [29, 31, 29, 32, 30, 30, 29, 29, 30, 32]}

 15%|█▍        | 149999/1000000 [2:04:55<9:13:20, 25.60it/s]global step 150000, trans_decision ep_re 20.986340828692

{"global_step": 150000, "eval_re": [17.627774244615694, 19.84038889678622, 
21.837299177910698, 19.4855566749541, 24.953317792989953, 22.036100841687915, 
19.510234957005675, 23.893345631459862, 21.919712772694808, 18.759677296815035],
"eval_len": [28, 31, 32, 29, 33, 30, 29, 30, 30, 28]}

 16%|█▌        | 159997/1000000 [2:13:33<9:06:51, 25.60it/s]global step 160000, trans_decision ep_re 97.07176846587176

{"global_step": 160000, "eval_re": [23.513971312323164, 570.582302068364, 
20.430891596910048, 230.9386013979146, 22.66819108745023, 22.285144103291916, 
20.211670858873376, 20.56711280141583, 15.198873896717066, 24.32092553545731], 
"eval_len": [31, 434, 29, 151, 30, 30, 30, 29, 25, 31]}

 17%|█▋        | 169998/1000000 [2:22:13<8:55:09, 25.85it/s]global step 170000, trans_decision ep_re 22.84060321692301

{"global_step": 170000, "eval_re": [19.92716680954775, 22.533094646369836, 
19.068206628814497, 26.530959702692545, 23.51247962741183, 23.218852636531132, 
20.89835670908448, 19.30643251540156, 25.295665656386028, 28.11481723699046], 
"eval_len": [28, 30, 29, 33, 30, 31, 29, 29, 31, 34]}

 18%|█▊        | 179999/1000000 [2:31:00<9:02:34, 25.19it/s]global step 180000, trans_decision ep_re 20.09340153077803

{"global_step": 180000, "eval_re": [20.310159124242432, 15.428449936837062, 
16.677551807938848, 20.77846550865549, 20.68864321580387, 23.683446779393933, 
17.03902440007819, 20.379346642757103, 22.620346726687053, 23.328581165386314], 
"eval_len": [30, 26, 27, 31, 29, 31, 27, 30, 30, 31]}

 19%|█▉        | 189997/1000000 [2:39:23<8:46:24, 25.65it/s]global step 190000, trans_decision ep_re 33.35320506540786

{"global_step": 190000, "eval_re": [15.925754048765938, 21.53678475743986, 
19.360457705939922, 17.5310928621928, 19.278193406121886, 160.38430227042755, 
24.486874315757266, 23.816811982357784, 8.789230516437064, 22.422548788638597], 
"eval_len": [27, 30, 29, 29, 29, 117, 32, 30, 18, 30]}

 20%|█▉        | 199999/1000000 [2:48:10<8:47:27, 25.28it/s]global step 200000, trans_decision ep_re 20.56751433709438

{"global_step": 200000, "eval_re": [24.052051550687548, 15.13985898063015, 
23.141761539836345, 22.766531868460454, 21.729437792173783, 20.25273923125239, 
20.61783216698, 22.235648021621586, 17.925703623886605, 17.813578595414963], 
"eval_len": [31, 27, 32, 31, 29, 30, 30, 32, 28, 27]}

 21%|██        | 209999/1000000 [2:56:35<8:33:24, 25.65it/s]global step 210000, trans_decision ep_re 23.139333278444788

{"global_step": 210000, "eval_re": [22.576893753690378, 22.40992329121811, 
11.186851699062636, 23.54657867310984, 54.55563233539376, 14.732050047993146, 
23.718776250701683, 17.570370025297713, 15.466971346061456, 25.62928536191913], 
"eval_len": [32, 34, 21, 32, 61, 26, 32, 29, 26, 32]}

 22%|██▏       | 219998/1000000 [3:05:11<8:24:51, 25.75it/s]global step 220000, trans_decision ep_re 27.61808759078095

{"global_step": 220000, "eval_re": [20.505147175134496, 66.39894247545784, 
24.60840578869644, 23.466978491703657, 18.571200806236103, 26.175672790166416, 
25.514389957058217, 27.068108873408246, 20.964713247848938, 22.90731630209908], 
"eval_len": [30, 65, 31, 31, 29, 33, 31, 32, 29, 32]}

 23%|██▎       | 229997/1000000 [3:14:00<8:19:12, 25.71it/s]global step 230000, trans_decision ep_re 59.418333090300756

{"global_step": 230000, "eval_re": [115.94477488052131, 21.46010729182869, 
149.5527043767072, 19.007542915999025, 22.61182629466722, 15.633546966749918, 
194.66746843634917, 18.533807192274274, 14.43415749750968, 22.337395050401195], 
"eval_len": [89, 30, 92, 30, 30, 28, 103, 28, 27, 31]}

 24%|██▍       | 239999/1000000 [3:22:25<8:17:24, 25.46it/s]global step 240000, trans_decision ep_re 28.688463406702517

{"global_step": 240000, "eval_re": [18.78381272741954, 20.49993945039868, 
20.021469121248956, 20.340084118927248, 25.776534210993045, 22.373058500019503, 
17.52487502784826, 98.07812618324486, 23.13116165563847, 20.355573071286578], 
"eval_len": [29, 29, 32, 29, 39, 31, 26, 87, 33, 31]}

 25%|██▍       | 249999/1000000 [3:31:01<8:09:02, 25.56it/s]global step 250000, trans_decision ep_re 21.49617874514717

{"global_step": 250000, "eval_re": [20.687366830585088, 26.460317353810993, 
25.00593586778878, 21.380044234375422, 21.751434625689953, 21.215695622380075, 
15.122374176194313, 22.18923279948692, 25.259382862866428, 15.89000307829376], 
"eval_len": [29, 33, 33, 30, 29, 29, 28, 32, 32, 27]}

 26%|██▌       | 259998/1000000 [3:39:50<7:58:16, 25.79it/s]global step 260000, trans_decision ep_re 40.207543482577265

{"global_step": 260000, "eval_re": [24.333268023172455, 20.983748619549925, 
19.316120801367507, 18.82657221554083, 18.110378911701243, 218.02400694675677, 
19.509983473826043, 21.92877741151811, 18.821474890514686, 22.221103531825054], 
"eval_len": [31, 30, 28, 28, 29, 119, 29, 30, 28, 32]}

 27%|██▋       | 269997/1000000 [3:48:14<8:01:12, 25.28it/s]global step 270000, trans_decision ep_re 24.426889678006226

{"global_step": 270000, "eval_re": [23.444037466166687, 23.157257312336462, 
24.845856265724283, 22.990435970817803, 17.022101253173517, 19.003687479383764, 
21.624639653184715, 22.43353089082384, 20.427355653081133, 49.31999483537009], 
"eval_len": [31, 31, 33, 30, 27, 28, 30, 31, 29, 62]}

 28%|██▊       | 279999/1000000 [3:56:50<7:43:56, 25.87it/s]global step 280000, trans_decision ep_re 119.66781631897054

{"global_step": 280000, "eval_re": [19.99475590690792, 24.96428751491138, 
21.045729792961325, 103.56580404547806, 23.678478059496573, 17.200411089311494, 
25.298551664306277, 21.59399179947616, 913.3856837866641, 25.950469530192088], 
"eval_len": [30, 31, 31, 93, 31, 30, 32, 30, 358, 32]}

 29%|██▉       | 289998/1000000 [4:05:25<7:34:31, 26.03it/s]global step 290000, trans_decision ep_re 30.438216346587904

{"global_step": 290000, "eval_re": [21.143141588984243, 23.793375296388945, 
22.02270326968842, 18.680592319652003, 21.76817388068355, 20.021874247796504, 
23.409215382295866, 26.48333996278116, 107.24696350993048, 19.812784007677852], 
"eval_len": [30, 30, 32, 27, 30, 29, 31, 33, 90, 28]}

 30%|██▉       | 299999/1000000 [4:14:00<7:33:52, 25.70it/s]global step 300000, trans_decision ep_re 59.72633505090549

{"global_step": 300000, "eval_re": [12.693609063359423, 24.644340259484878, 
20.395047249881493, 274.0371122544903, 23.762773248278133, 25.114649425432923, 
21.645927433533753, 149.92260613103224, 21.749735310932365, 23.297550132629492],
"eval_len": [25, 33, 30, 143, 32, 32, 30, 92, 29, 30]}

 31%|███       | 309999/1000000 [4:22:35<7:25:44, 25.80it/s]global step 310000, trans_decision ep_re 73.48951207931275

{"global_step": 310000, "eval_re": [448.8678240874848, 21.23675685444194, 
15.23358093339421, 23.66195636445115, 22.225793377320716, 21.148995718024995, 
120.3711768220355, 19.632389163570743, 19.135826482965488, 23.380820989438007], 
"eval_len": [180, 30, 26, 31, 32, 32, 96, 29, 29, 32]}

 32%|███▏      | 319999/1000000 [4:31:20<7:22:30, 25.61it/s]global step 320000, trans_decision ep_re 95.16780081900427

{"global_step": 320000, "eval_re": [24.241537499076202, 23.428449110016885, 
19.93716795197059, 659.2415285689381, 23.918671235318453, 16.349237851400044, 
21.18994561228004, 79.55293333090314, 21.629019074199974, 62.189517955939245], 
"eval_len": [31, 30, 30, 266, 30, 29, 29, 81, 29, 65]}

 33%|███▎      | 329999/1000000 [4:39:45<7:14:19, 25.71it/s]global step 330000, trans_decision ep_re 67.25598662910653

{"global_step": 330000, "eval_re": [24.147055404965812, 197.53317630730626, 
22.628403630205188, 21.311340008805274, 21.343051604286355, 17.45071951925484, 
23.298009710010408, 19.071415838644732, 142.01870805486553, 183.75798621272088],
"eval_len": [31, 125, 31, 31, 30, 27, 30, 29, 100, 117]}

 34%|███▍      | 339999/1000000 [4:48:20<7:11:26, 25.50it/s]global step 340000, trans_decision ep_re 38.69721903911782

{"global_step": 340000, "eval_re": [83.85427227971748, 12.873973266277073, 
91.93981582686034, 23.006370578087704, 21.24627768005839, 70.13323468093887, 
23.65913060775033, 20.874044705806863, 17.60647768673096, 21.77859307895017], 
"eval_len": [76, 23, 81, 32, 30, 73, 32, 31, 28, 32]}

 35%|███▍      | 349999/1000000 [4:56:55<7:14:20, 24.94it/s]global step 350000, trans_decision ep_re 38.28683259884015

{"global_step": 350000, "eval_re": [21.14641362697214, 21.21570402952914, 
21.066012484256866, 20.537580644658426, 22.129692391772952, 23.037846835540815, 
18.974969956184236, 20.93769803449989, 22.60686139106522, 191.21554659392174], 
"eval_len": [31, 32, 30, 30, 30, 31, 28, 31, 31, 116]}

 36%|███▌      | 359999/1000000 [5:05:30<6:58:20, 25.50it/s]global step 360000, trans_decision ep_re 30.608928139158564

{"global_step": 360000, "eval_re": [92.77549234580152, 18.679055233533457, 
18.747531184880323, 20.848375047584703, 20.92469078724245, 57.561577380321616, 
17.870490721404177, 23.937368941339322, 15.06999535711459, 19.674704392363463], 
"eval_len": [82, 28, 30, 31, 30, 64, 29, 32, 27, 29]}

 37%|███▋      | 369999/1000000 [5:14:05<6:51:58, 25.49it/s]global step 370000, trans_decision ep_re 91.11786190281586

{"global_step": 370000, "eval_re": [313.8649998671221, 22.650175153334864, 
190.2386839029047, 15.118822968438689, 15.12308979853031, 118.1837088599827, 
21.04856180355035, 170.36122193154605, 22.57440755578394, 22.01494718696498], 
"eval_len": [170, 30, 121, 26, 30, 101, 32, 111, 31, 31]}

 38%|███▊      | 379999/1000000 [5:22:41<6:44:16, 25.56it/s]global step 380000, trans_decision ep_re 124.08102539249842

{"global_step": 380000, "eval_re": [79.09323472187434, 134.42535562508172, 
17.824037369642387, 338.03935557255255, 343.88849592335646, 197.88414323128106, 
79.45836430380928, 17.111248373501873, 14.67370223689295, 18.412316566991382], 
"eval_len": [79, 96, 28, 166, 161, 121, 92, 27, 26, 29]}

 39%|███▉      | 389997/1000000 [5:31:30<6:36:37, 25.63it/s]global step 390000, trans_decision ep_re 126.55328714355551

{"global_step": 390000, "eval_re": [20.20812054810253, 11.896864527127818, 
128.32880003427485, 17.79489634483057, 148.8415097776105, 361.86176545491423, 
19.45034450123892, 270.10820465526587, 15.94851157817309, 271.0938540140168], 
"eval_len": [28, 25, 111, 28, 112, 204, 30, 149, 27, 149]}

 40%|███▉      | 399997/1000000 [5:39:53<6:29:49, 25.65it/s]global step 400000, trans_decision ep_re 130.11818637642597

{"global_step": 400000, "eval_re": [134.10227102385616, 365.603230638637, 
163.78713665494092, 20.274366127678746, 14.816538146346952, 17.86692613306911, 
17.076728681694444, 22.14231022045542, 530.0613293097566, 15.451026827824156], 
"eval_len": [105, 172, 110, 31, 29, 28, 29, 31, 217, 29]}

 41%|████      | 409999/1000000 [5:48:40<6:21:57, 25.74it/s]global step 410000, trans_decision ep_re 94.70248672338764

{"global_step": 410000, "eval_re": [26.37538306402342, 267.4743270121909, 
120.95939250210667, 24.70549889385952, 214.34247674054834, 17.860409021390804, 
16.518197084140898, 19.149127244884152, 219.70750563801346, 19.932550032718325],
"eval_len": [32, 145, 95, 31, 128, 28, 26, 27, 122, 29]}

 42%|████▏     | 419999/1000000 [5:57:20<6:16:30, 25.68it/s]global step 420000, trans_decision ep_re 229.49803355010258

{"global_step": 420000, "eval_re": [161.20068130447032, 20.84415168615374, 
18.83815517920827, 384.2509306311293, 20.432613722753153, 1031.3435190523526, 
18.265096618973818, 598.4045702000212, 22.982529576595756, 18.418087529367877], 
"eval_len": [100, 31, 29, 183, 29, 335, 28, 232, 31, 29]}

 43%|████▎     | 429998/1000000 [6:05:42<6:06:55, 25.89it/s]global step 430000, trans_decision ep_re 264.8600659450202

{"global_step": 430000, "eval_re": [21.892130357260772, 21.02303983350514, 
556.0031015951346, 21.143671643357205, 619.7997947811091, 104.1107710177447, 
437.011367863661, 106.68065536376085, 739.6568697101578, 21.279257284510678], 
"eval_len": [29, 29, 222, 29, 230, 87, 207, 81, 277, 30]}

 44%|████▍     | 439999/1000000 [6:14:20<6:06:00, 25.50it/s]global step 440000, trans_decision ep_re 173.7739215742618

{"global_step": 440000, "eval_re": [124.62872731396945, 99.84838070023184, 
108.03673714257717, 21.49066350550102, 624.325100593786, 93.4840425213464, 
23.37897080503374, 598.5236520920748, 17.918918846669268, 26.104022221428558], 
"eval_len": [105, 98, 106, 29, 252, 83, 31, 212, 31, 32]}

 45%|████▍     | 449997/1000000 [6:23:10<5:58:30, 25.57it/s]global step 450000, trans_decision ep_re 24.885551358254975

{"global_step": 450000, "eval_re": [19.54464359678102, 15.602379122606328, 
20.048413082641055, 20.69105626763269, 23.248865122076804, 17.536059634415842, 
19.071427035425767, 71.78264236354966, 19.95613734398846, 21.373890013432113], 
"eval_len": [29, 26, 29, 30, 31, 27, 28, 95, 29, 30]}

 46%|████▌     | 459998/1000000 [6:31:33<5:47:40, 25.89it/s]global step 460000, trans_decision ep_re 36.81235134969063

{"global_step": 460000, "eval_re": [21.201394770989, 22.018723622680188, 
16.125327040583247, 18.79246226719333, 24.569898087571627, 19.57772134430513, 
18.546213218957792, 23.57442085125551, 185.1003026879252, 18.617049605445303], 
"eval_len": [30, 31, 29, 28, 31, 28, 29, 32, 125, 28]}

 47%|████▋     | 469997/1000000 [6:40:20<5:46:08, 25.52it/s]global step 470000, trans_decision ep_re 115.01626197499563

{"global_step": 470000, "eval_re": [19.111382729981756, 24.257663684060923, 
517.9974558595778, 20.377339495348767, 462.7084248798406, 23.74947141639664, 
22.72226992624989, 18.206540154739212, 22.956495013368983, 18.075576590391677], 
"eval_len": [28, 32, 206, 28, 169, 32, 30, 27, 31, 29]}

 48%|████▊     | 479997/1000000 [6:48:44<5:36:19, 25.77it/s]global step 480000, trans_decision ep_re 67.68180403871233

{"global_step": 480000, "eval_re": [18.843833172253607, 15.20732014018584, 
198.40091734772744, 14.802191375045727, 143.8047598670456, 203.7243329819206, 
19.744430776718094, 23.559148448040016, 17.755263085941106, 20.975843192245225],
"eval_len": [28, 30, 130, 27, 115, 130, 28, 31, 27, 30]}

 49%|████▉     | 489999/1000000 [6:57:30<5:33:21, 25.50it/s]global step 490000, trans_decision ep_re 262.2496361172307

{"global_step": 490000, "eval_re": [22.20551122869477, 1419.9219577782928, 
19.265191466166343, 22.14122973297141, 22.251897056310497, 20.694467339172984, 
16.342826786435204, 22.024488893130577, 1037.3691738440043, 20.279617047128127],
"eval_len": [30, 456, 30, 31, 30, 29, 28, 31, 342, 29]}

 50%|████▉     | 499998/1000000 [7:06:10<5:22:17, 25.86it/s]global step 500000, trans_decision ep_re 93.54284162091079

{"global_step": 500000, "eval_re": [15.890022777597935, 19.932135205545798, 
252.7533320514885, 16.211776025497297, 533.0179722222374, 15.688883304651382, 
17.995486653358792, 19.64230675475574, 23.53097858546504, 20.76552262851011], 
"eval_len": [28, 29, 133, 27, 256, 27, 27, 29, 31, 30]}

 51%|█████     | 509997/1000000 [7:14:33<5:19:46, 25.54it/s]global step 510000, trans_decision ep_re 69.68081013576968

{"global_step": 510000, "eval_re": [23.515693531105526, 209.07263343683562, 
22.84424838300593, 21.887353127656063, 20.479615975858653, 20.168698831167884, 
315.12436278955477, 23.990936373944592, 19.988866077261953, 19.73569283130574], 
"eval_len": [31, 131, 30, 31, 30, 28, 163, 30, 29, 30]}

 52%|█████▏    | 519999/1000000 [7:23:20<5:13:18, 25.53it/s]global step 520000, trans_decision ep_re 73.12568953674074

{"global_step": 520000, "eval_re": [20.567886411699057, 182.08323312921223, 
23.400636403876216, 26.602687958101487, 26.676159287380607, 84.36132641769582, 
303.02238979649536, 22.85803805115341, 19.519195948128605, 22.165341963664563], 
"eval_len": [29, 222, 31, 32, 32, 210, 287, 30, 28, 31]}

 53%|█████▎    | 529999/1000000 [7:31:45<5:06:20, 25.57it/s]global step 530000, trans_decision ep_re 75.56685709241228

{"global_step": 530000, "eval_re": [23.316922027588195, 78.62152721249139, 
65.72744810951852, 24.812819622996855, 318.2342246089418, 23.360401037643978, 
21.423497902311457, 23.579389004688892, 23.97979460389735, 152.61254679404428], 
"eval_len": [31, 87, 93, 31, 154, 31, 31, 31, 30, 117]}

 54%|█████▍    | 539999/1000000 [7:40:21<4:58:07, 25.72it/s]global step 540000, trans_decision ep_re 266.63092911598324

{"global_step": 540000, "eval_re": [14.826178735959495, 19.408490155454352, 
337.93873014399685, 324.8224712454153, 845.6213949804684, 23.496652640542475, 
24.456946602169513, 1035.7122333975997, 17.37162810324206, 22.65456515498406], 
"eval_len": [27, 28, 191, 156, 309, 32, 32, 347, 27, 31]}

 55%|█████▍    | 549999/1000000 [7:49:10<4:51:57, 25.69it/s]global step 550000, trans_decision ep_re 34.21934309615243

{"global_step": 550000, "eval_re": [16.967374822937387, 20.147934450954356, 
20.61773283157165, 63.8633636244018, 19.904644562539524, 19.115593642132435, 
62.76825786837947, 17.63633241965216, 80.48825789645274, 20.683938842502794], 
"eval_len": [27, 28, 31, 104, 28, 29, 71, 30, 73, 31]}

 56%|█████▌    | 559999/1000000 [7:57:35<4:45:41, 25.67it/s]global step 560000, trans_decision ep_re 20.94948965894435

{"global_step": 560000, "eval_re": [22.514707416313662, 19.742525876857368, 
22.45679621884136, 18.29886175187561, 20.527732505251585, 20.98317894476713, 
21.436026603833646, 25.225796233280754, 17.834825631835358, 20.47444540658708], 
"eval_len": [30, 29, 29, 28, 30, 30, 30, 32, 28, 29]}

 57%|█████▋    | 569999/1000000 [8:06:20<4:38:41, 25.72it/s]global step 570000, trans_decision ep_re 165.04779089608047

{"global_step": 570000, "eval_re": [23.599300517935337, 20.6496553386843, 
20.111995105839352, 461.63753247919254, 664.7471700141223, 18.52990078279473, 
213.6682718548831, 21.196302071699485, 23.527151619087018, 182.81062917656692], 
"eval_len": [31, 29, 30, 199, 242, 28, 130, 31, 32, 121]}

 58%|█████▊    | 579998/1000000 [8:15:00<4:32:09, 25.72it/s]global step 580000, trans_decision ep_re 110.49180617856378

{"global_step": 580000, "eval_re": [22.012167945918794, 22.062377959633043, 
198.53913839023846, 22.910558772710186, 20.872731043808052, 92.67041725081685, 
203.29469389529825, 55.7676188994795, 453.843617124735, 12.94474050299968], 
"eval_len": [31, 31, 122, 30, 29, 107, 125, 94, 174, 23]}

 59%|█████▉    | 589997/1000000 [8:23:40<4:26:44, 25.62it/s]global step 590000, trans_decision ep_re 433.29706264654413

{"global_step": 590000, "eval_re": [19.200278270681945, 22.676252536289066, 
20.6249515949399, 808.3448271150862, 23.670145933294705, 1616.0105845292783, 
69.03661293803279, 21.345903648779764, 22.493749256374187, 1709.5673206426854], 
"eval_len": [28, 31, 29, 264, 30, 513, 69, 30, 31, 528]}

 60%|█████▉    | 599997/1000000 [8:32:03<4:22:36, 25.39it/s]global step 600000, trans_decision ep_re 90.14826254416855

{"global_step": 600000, "eval_re": [535.164293892582, 68.96257013842909, 
17.957666799333143, 19.5364705475832, 19.607747729858808, 15.84191758008593, 
61.95427252005034, 56.17749260724418, 87.78454751188076, 18.49564611463812], 
"eval_len": [173, 73, 30, 28, 30, 26, 69, 62, 79, 30]}

 61%|██████    | 609999/1000000 [8:40:50<4:12:04, 25.79it/s]global step 610000, trans_decision ep_re 37.0266832361716

{"global_step": 610000, "eval_re": [20.066166266331297, 22.02017702405298, 
60.910999759164596, 18.90203687015282, 20.498408665364494, 22.3174149345374, 
21.177863635915454, 22.103827857774945, 143.8098603421998, 18.4600770062222], 
"eval_len": [29, 29, 72, 28, 29, 30, 30, 30, 114, 28]}

 62%|██████▏   | 619999/1000000 [8:49:15<4:12:10, 25.12it/s]global step 620000, trans_decision ep_re 35.91112186566542

{"global_step": 620000, "eval_re": [80.78325020621818, 49.764901097582296, 
20.066802909989708, 18.565618593088118, 84.0710993226039, 21.3076046872976, 
34.51176118649892, 19.005664708848744, 16.52553250314714, 14.508983441379668], 
"eval_len": [79, 63, 30, 31, 77, 29, 46, 30, 32, 30]}

 63%|██████▎   | 629998/1000000 [8:57:52<3:56:11, 26.11it/s]global step 630000, trans_decision ep_re 192.02787873408735

{"global_step": 630000, "eval_re": [18.54615179402079, 26.009252505414594, 
20.08971675014586, 166.02946840031555, 75.19264833148729, 17.461299023916077, 
240.65628404244356, 1309.3666444750713, 24.504107763583097, 22.423214254475596],
"eval_len": [29, 32, 30, 103, 92, 28, 136, 405, 32, 31]}

 64%|██████▍   | 639999/1000000 [9:06:30<3:56:28, 25.37it/s]global step 640000, trans_decision ep_re 150.53949476536158

{"global_step": 640000, "eval_re": [43.94647028346355, 439.5336808937961, 
16.900474235270902, 240.5387123525476, 19.27623290905311, 345.60661824019905, 
15.968032081890334, 346.50539048191115, 20.228860931015543, 16.89047524446853], 
"eval_len": [55, 169, 27, 124, 29, 144, 26, 169, 31, 27]}

 65%|██████▍   | 649997/1000000 [9:15:20<3:47:04, 25.69it/s]global step 650000, trans_decision ep_re 121.09187886489822

{"global_step": 650000, "eval_re": [20.091326400638124, 195.02849078052537, 
325.44424739894004, 18.84297376129839, 24.392449307799044, 20.80441710073557, 
343.9731343303167, 76.17764143098915, 19.448683534199457, 166.71542460354033], 
"eval_len": [28, 115, 170, 29, 31, 31, 163, 79, 30, 130]}

 66%|██████▌   | 659998/1000000 [9:24:00<3:39:20, 25.84it/s]global step 660000, trans_decision ep_re 157.863769139624

{"global_step": 660000, "eval_re": [55.02940519103809, 480.8949694217935, 
19.925179508625764, 104.11360655892341, 18.988695754839025, 15.318799441406124, 
20.299287056793403, 471.4696106690196, 18.75498999327768, 373.84314780052335], 
"eval_len": [67, 185, 28, 96, 27, 28, 28, 183, 31, 179]}

 67%|██████▋   | 669999/1000000 [9:32:40<3:37:16, 25.31it/s]global step 670000, trans_decision ep_re 37.35198059692386

{"global_step": 670000, "eval_re": [15.57962665473279, 22.566018260233594, 
35.3984937252816, 53.3324660862353, 22.671692887318354, 73.37327069413696, 
57.74412267559201, 49.45783997192191, 21.884490638134487, 21.511784375651565], 
"eval_len": [27, 32, 101, 207, 30, 77, 72, 66, 32, 30]}

 68%|██████▊   | 679999/1000000 [9:41:01<3:27:37, 25.69it/s]global step 680000, trans_decision ep_re 34.85156037701857

{"global_step": 680000, "eval_re": [16.952230530288656, 74.23532272327671, 
16.15932218569991, 23.521224306127607, 75.24662170057509, 16.742804072451275, 
70.00890732367304, 20.34104064262895, 16.56477282280701, 18.743357462657404], 
"eval_len": [29, 74, 27, 31, 70, 27, 76, 30, 27, 29]}

 69%|██████▉   | 689998/1000000 [9:49:50<3:19:11, 25.94it/s]global step 690000, trans_decision ep_re 115.80671537666899

{"global_step": 690000, "eval_re": [15.346540591841284, 23.057031886324708, 
277.28718165135604, 13.584236117053877, 22.28888893888549, 240.91886384175785, 
459.28839969988803, 19.52269574166565, 67.19438587023717, 19.578929427679657], 
"eval_len": [26, 31, 135, 25, 31, 124, 186, 30, 91, 29]}

 70%|██████▉   | 699998/1000000 [9:58:12<3:13:15, 25.87it/s]global step 700000, trans_decision ep_re 168.9522415385291

{"global_step": 700000, "eval_re": [15.171082887526968, 16.820921854031962, 
358.08077703692, 522.0602360807945, 371.55389848904264, 14.387608053439111, 
148.06096197296134, 209.3438256110258, 16.912147913777485, 17.130955485771434], 
"eval_len": [27, 28, 175, 299, 222, 27, 112, 130, 26, 29]}

 71%|███████   | 709999/1000000 [10:07:00<3:08:50, 25.60it/s]global step 710000, trans_decision ep_re 142.4039957073839

{"global_step": 710000, "eval_re": [21.53927032806652, 34.86385551874781, 
12.733213119039403, 481.43269846061634, 200.8727430340817, 16.973022593729166, 
472.1762855703367, 143.1454531910284, 20.055273699316203, 20.24814155887695], 
"eval_len": [30, 45, 29, 190, 118, 30, 213, 95, 29, 29]}

 72%|███████▏  | 719998/1000000 [10:15:40<2:57:49, 26.24it/s]global step 720000, trans_decision ep_re 201.52106007314973

{"global_step": 720000, "eval_re": [200.34109755193566, 345.4178669880176, 
172.16829357921696, 68.10199419699411, 100.92954951594088, 19.538854631951523, 
163.5231456030397, 323.13603590160443, 144.55205908887046, 477.50170367392565], 
"eval_len": [118, 155, 109, 73, 87, 29, 99, 128, 122, 191]}

 73%|███████▎  | 729997/1000000 [10:24:03<2:56:26, 25.51it/s]global step 730000, trans_decision ep_re 33.04548480280499

{"global_step": 730000, "eval_re": [50.82391565951945, 68.51126098650676, 
18.983707227572705, 22.253507511020903, 19.151870073750032, 70.33984354834129, 
14.91838990063826, 21.753430404131205, 21.076317983218775, 22.64260473335054], 
"eval_len": [131, 108, 29, 29, 28, 158, 27, 31, 29, 29]}

 74%|███████▍  | 739999/1000000 [10:32:50<2:50:38, 25.39it/s]global step 740000, trans_decision ep_re 27.953930131460073

{"global_step": 740000, "eval_re": [24.42083483795213, 21.517872588368583, 
23.01962467127747, 14.70737928804372, 19.42113396961775, 16.902561725457222, 
22.63960198275299, 99.03760763514698, 17.391315616234753, 20.481368999749122], 
"eval_len": [31, 30, 30, 29, 29, 27, 30, 99, 28, 28]}

 75%|███████▍  | 749999/1000000 [10:41:30<2:44:01, 25.40it/s]global step 750000, trans_decision ep_re 142.5312013040625

{"global_step": 750000, "eval_re": [61.954019076468185, 788.5923228970892, 
35.2550711480724, 20.82561150521134, 20.02271635100501, 197.51675896089196, 
101.73922337796326, 21.77126304023273, 169.44550160644386, 8.189525077247225], 
"eval_len": [70, 275, 42, 30, 29, 133, 101, 30, 110, 22]}

 76%|███████▌  | 759998/1000000 [10:49:52<2:34:09, 25.95it/s]global step 760000, trans_decision ep_re 115.15373990914684

{"global_step": 760000, "eval_re": [19.5451309293744, 16.350525984694606, 
865.0506679166534, 19.65265995475383, 17.835880107028014, 22.561902018123963, 
133.9742839242753, 18.62190852033305, 18.172345971339094, 19.77209376489263], 
"eval_len": [29, 28, 279, 29, 28, 30, 110, 30, 29, 29]}

 77%|███████▋  | 769997/1000000 [10:58:40<2:30:14, 25.51it/s]global step 770000, trans_decision ep_re 127.05471382119781

{"global_step": 770000, "eval_re": [131.55848564661602, 15.463198161384312, 
237.5777770014033, 51.16496071125866, 88.34505091058206, 19.867940623195462, 
21.395476890119706, 17.962098365154755, 556.5230981527934, 130.68905174947034], 
"eval_len": [117, 27, 163, 61, 119, 30, 29, 30, 207, 106]}

 78%|███████▊  | 779999/1000000 [11:07:20<2:22:33, 25.72it/s]global step 780000, trans_decision ep_re 181.31302814903535

{"global_step": 780000, "eval_re": [18.07060759810213, 22.957039267381255, 
170.17438829345545, 26.04617727074798, 20.628977929621065, 20.763751176327247, 
21.521551006443012, 21.909770269370824, 1474.2737703171795, 16.784248361724984],
"eval_len": [28, 32, 115, 32, 29, 29, 30, 30, 441, 29]}

 79%|███████▉  | 789998/1000000 [11:15:43<2:15:49, 25.77it/s]global step 790000, trans_decision ep_re 23.32201927460178

{"global_step": 790000, "eval_re": [55.02089430593229, 19.542337030789977, 
16.67722474288278, 17.23980040648803, 21.59877329969736, 19.615765779095042, 
22.657464173136308, 18.995250747164135, 20.35793211655578, 21.5147501442761], 
"eval_len": [69, 30, 28, 27, 31, 31, 30, 28, 29, 32]}

 80%|███████▉  | 799997/1000000 [11:24:30<2:09:58, 25.65it/s]global step 800000, trans_decision ep_re 63.668267511226205

{"global_step": 800000, "eval_re": [17.798597757747242, 19.739876316913495, 
22.795145020751107, 43.93859196014502, 17.57456938404707, 25.68027872513282, 
17.02928735996131, 15.363546462785257, 432.31221595845204, 24.450566166326645], 
"eval_len": [29, 28, 30, 54, 28, 32, 29, 26, 168, 31]}

 81%|████████  | 809997/1000000 [11:32:54<2:04:08, 25.51it/s]global step 810000, trans_decision ep_re 128.28589234311642

{"global_step": 810000, "eval_re": [124.92011382155748, 21.703805134205545, 
19.10245525651559, 19.875049784790665, 23.856116205553825, 21.863425534050208, 
20.471026367454325, 294.67248619947816, 720.0201646422393, 16.37428048531916], 
"eval_len": [79, 31, 29, 30, 32, 29, 31, 148, 293, 27]}

 82%|████████▏ | 819999/1000000 [11:41:31<1:57:01, 25.64it/s]global step 820000, trans_decision ep_re 87.3541770332963

{"global_step": 820000, "eval_re": [21.377130491445808, 682.6116804433893, 
19.594941240977143, 17.23517017956297, 22.038143724541687, 21.96006657301783, 
25.668518589032356, 21.31399652247285, 19.65010196328303, 22.092020605240027], 
"eval_len": [31, 232, 28, 28, 31, 29, 31, 29, 28, 29]}

 83%|████████▎ | 829998/1000000 [11:50:20<1:50:01, 25.75it/s]global step 830000, trans_decision ep_re 21.323926096874924

{"global_step": 830000, "eval_re": [26.43541317542694, 19.750854748741872, 
21.201740351926617, 19.963929298064922, 22.673596882167057, 21.80526714087626, 
22.14306171312149, 17.155272104819836, 21.432753772828924, 20.677371780775296], 
"eval_len": [32, 30, 29, 29, 31, 31, 31, 29, 30, 29]}

 84%|████████▍ | 839998/1000000 [11:58:42<1:43:35, 25.74it/s]global step 840000, trans_decision ep_re 44.525255283428656

{"global_step": 840000, "eval_re": [22.560906421629134, 17.536885472423343, 
23.46858453961882, 23.464652767223917, 214.8425210446397, 22.260844329891434, 
22.847136825363727, 22.49739738367932, 20.578176973287544, 55.19544707652959], 
"eval_len": [31, 27, 31, 31, 132, 30, 31, 31, 30, 58]}

 85%|████████▍ | 849997/1000000 [12:07:30<1:38:17, 25.43it/s]global step 850000, trans_decision ep_re 18.844755183591428

{"global_step": 850000, "eval_re": [21.340252114992637, 14.399328466829422, 
18.123776510456434, 21.974936676927655, 23.036300001294503, 22.934357020082835, 
10.81561358533645, 20.830803869355577, 21.522230949912277, 13.469952640726493], 
"eval_len": [30, 25, 29, 31, 30, 32, 22, 29, 32, 29]}

 86%|████████▌ | 859998/1000000 [12:15:53<1:30:34, 25.76it/s]global step 860000, trans_decision ep_re 81.40888540896225

{"global_step": 860000, "eval_re": [121.93260472985514, 17.304488373468654, 
176.2196101838671, 245.34010274378136, 22.115857112840644, 107.04177404839069, 
20.138176132532102, 63.00212398635429, 17.40740217957194, 23.58671459896062], 
"eval_len": [114, 28, 150, 141, 31, 96, 30, 105, 28, 31]}

 87%|████████▋ | 869999/1000000 [12:24:30<1:25:04, 25.47it/s]global step 870000, trans_decision ep_re 29.405899261339442

{"global_step": 870000, "eval_re": [20.144701493977095, 22.201100693415224, 
11.507149787408105, 22.011539505059016, 17.580717426118685, 53.677590526491784, 
86.63719151146184, 23.23243049978677, 22.006065405161262, 15.060505764514604], 
"eval_len": [31, 30, 25, 30, 28, 61, 80, 31, 31, 29]}

 88%|████████▊ | 879999/1000000 [12:33:20<1:17:20, 25.86it/s]global step 880000, trans_decision ep_re 56.68029605037991

{"global_step": 880000, "eval_re": [22.264832601451378, 17.403009816667907, 
19.836368325185813, 64.52021104438924, 81.20034041372982, 17.463314162541963, 
178.84455688298831, 21.774089343060368, 20.344264992326643, 123.15197292145767],
"eval_len": [31, 27, 31, 73, 80, 28, 113, 30, 30, 97]}

 89%|████████▉ | 889998/1000000 [12:41:42<1:10:30, 26.00it/s]global step 890000, trans_decision ep_re 28.600409013205656

{"global_step": 890000, "eval_re": [23.436837064303823, 21.459316521497588, 
28.14787560296311, 59.92914904766739, 23.70779630713913, 35.3617909414888, 
35.64531146186788, 22.499007729814807, 11.321459145836709, 24.495546309477323], 
"eval_len": [30, 29, 41, 64, 32, 43, 41, 30, 23, 32]}

 90%|████████▉ | 899997/1000000 [12:50:30<1:04:55, 25.67it/s]global step 900000, trans_decision ep_re 207.69636202283783

{"global_step": 900000, "eval_re": [234.4146304729199, 47.117365977351305, 
165.6008679786331, 19.517973452251837, 19.32577991735947, 238.2434182732031, 
19.52753708595929, 1160.591464557644, 15.535168269401174, 157.0894142436548], 
"eval_len": [126, 55, 117, 29, 29, 135, 28, 325, 28, 100]}

 91%|█████████ | 909999/1000000 [12:59:10<58:25, 25.67it/s]global step 910000, trans_decision ep_re 100.38228470707533

{"global_step": 910000, "eval_re": [20.66178166076284, 22.59558270516794, 
215.16579409754482, 20.060177467465614, 20.070278219796347, 565.8820317842012, 
77.93855345493331, 21.052404385395604, 19.38821172541228, 21.008031570073264], 
"eval_len": [30, 31, 160, 30, 28, 210, 99, 29, 28, 29]}

 92%|█████████▏| 919998/1000000 [13:07:32<51:25, 25.92it/s]global step 920000, trans_decision ep_re 65.85055037460802

{"global_step": 920000, "eval_re": [21.068243118560073, 398.01452791246305, 
21.248792985468853, 33.455223384033125, 58.87771097435313, 16.350178557869775, 
43.569650162555504, 19.95964027327275, 24.808439194148995, 21.153097183354998], 
"eval_len": [31, 216, 29, 54, 73, 31, 59, 29, 31, 31]}

 93%|█████████▎| 929999/1000000 [13:16:20<45:37, 25.57it/s]global step 930000, trans_decision ep_re 204.23174033163386

{"global_step": 930000, "eval_re": [15.381239866047157, 412.7838050167297, 
142.8714144918152, 193.36768503860364, 333.13865216618024, 64.00567054708443, 
789.473323087534, 16.95554430976788, 55.890530960805, 18.449537831771295], 
"eval_len": [27, 173, 112, 109, 141, 96, 272, 27, 66, 28]}

 94%|█████████▍| 939998/1000000 [13:25:00<38:46, 25.79it/s]global step 940000, trans_decision ep_re 39.35657631603029

{"global_step": 940000, "eval_re": [65.32879576491469, 51.38949364766693, 
18.291792076262766, 65.03992586695709, 21.952499079523083, 16.350865968082886, 
16.814376453939722, 47.768872002564144, 56.16194748487548, 34.46719481551612], 
"eval_len": [65, 62, 29, 73, 31, 31, 27, 60, 73, 41]}

 95%|█████████▍| 949997/1000000 [13:33:24<32:47, 25.41it/s]global step 950000, trans_decision ep_re 57.403597986751834

{"global_step": 950000, "eval_re": [287.28402388383745, 21.193946216519095, 
14.421325267900203, 20.67023989949574, 131.25837545215845, 17.153420427451348, 
20.150427430755293, 20.024107294176652, 19.50135544883655, 22.37875854638758], 
"eval_len": [153, 31, 27, 30, 106, 28, 31, 30, 29, 31]}

 96%|█████████▌| 959999/1000000 [13:42:01<26:12, 25.44it/s]global step 960000, trans_decision ep_re 69.82389943442344

{"global_step": 960000, "eval_re": [149.09017998587493, 19.530599117070487, 
20.052812749857626, 22.375881374432904, 22.069388542070953, 23.96463073418312, 
19.164246523436656, 380.3873776128808, 21.930692352380515, 19.67318535204631], 
"eval_len": [141, 29, 30, 30, 29, 31, 29, 157, 29, 31]}

 97%|█████████▋| 969998/1000000 [13:50:50<19:22, 25.81it/s]global step 970000, trans_decision ep_re 34.57528249959328

{"global_step": 970000, "eval_re": [19.657272155341772, 16.508334937592338, 
18.59217727361567, 66.05313873747278, 21.713677336453706, 21.9990096649848, 
18.41251070208539, 52.46397028093324, 11.79648306149489, 98.55625084595816], 
"eval_len": [31, 26, 30, 62, 30, 30, 30, 58, 29, 79]}

 98%|█████████▊| 979997/1000000 [13:59:30<13:00, 25.63it/s]global step 980000, trans_decision ep_re 270.3008909578721

{"global_step": 980000, "eval_re": [328.87848005991395, 19.15491791034508, 
112.13846068130849, 161.62489359239672, 85.59533971922134, 62.50732300180715, 
114.14698943317657, 282.4572081261454, 857.7130211356695, 678.7922759187372], 
"eval_len": [172, 30, 106, 124, 78, 75, 99, 126, 254, 227]}

 99%|█████████▉| 989998/1000000 [14:07:52<06:24, 25.98it/s]global step 990000, trans_decision ep_re 293.1275675656823

{"global_step": 990000, "eval_re": [475.8268796423125, 22.373919168132474, 
506.8217992342642, 98.70095676589591, 959.2400645508056, 19.78826134302016, 
11.857951218368356, 790.0210711831259, 21.894613145927917, 24.75015940497012], 
"eval_len": [251, 30, 193, 88, 296, 29, 22, 262, 31, 32]}

100%|█████████▉| 999999/1000000 [14:16:31<00:00, 25.77it/s]global step 1000000, trans_decision ep_re 65.83996418370616

{"global_step": 1000000, "eval_re": [20.979755356218543, 50.49625237139156, 
12.60542613879941, 63.56978546652789, 19.566551907089153, 18.54941992951229, 
25.71870413334103, 409.7926192870368, 15.661123945746548, 21.460003301398384], 
"eval_len": [30, 68, 29, 64, 28, 29, 31, 183, 30, 30]}

100%|██████████| 1000000/1000000 [14:16:45<00:00, 19.45it/s]
