
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:12<10:27:38, 26.29it/s]global step 10000, trans_decision ep_re 39.80352134461497

{"global_step": 10000, "eval_re": [89.97163492323126, 11.082364365101046, 
19.212540117074344, 21.822817119970146, 48.126525003744455, 73.68993548876865, 
8.924289281910264, 12.587686371250827, 38.85758927454961, 73.75983150054917], 
"eval_len": [58, 19, 25, 23, 48, 52, 18, 14, 35, 110]}

  2%|▏         | 19999/1000000 [12:50<10:23:33, 26.19it/s]global step 20000, trans_decision ep_re 48.03592244465306

{"global_step": 20000, "eval_re": [82.5004344042926, 65.06207122700712, 
80.25427322280431, 12.160682569430014, 73.54019139338145, 8.955272385347039, 
65.38491249243384, 66.1552832818029, 9.631509283159536, 16.714594186871828], 
"eval_len": [51, 73, 47, 14, 55, 18, 39, 40, 25, 21]}

  3%|▎         | 29999/1000000 [21:20<10:16:29, 26.22it/s]global step 30000, trans_decision ep_re 32.97438204292472

{"global_step": 30000, "eval_re": [179.235286517104, 23.253733710118517, 
12.00792590774725, 50.93084944856036, 10.270259152260724, 7.476013573090714, 
12.36672802581075, 11.749205457684525, 10.60878596045093, 11.845032676419393], 
"eval_len": [146, 24, 14, 61, 14, 15, 24, 20, 24, 16]}

  4%|▍         | 39999/1000000 [29:50<10:16:56, 25.93it/s]global step 40000, trans_decision ep_re 49.98424301381145

{"global_step": 40000, "eval_re": [121.32598516524057, 11.617249100414433, 
76.7572056013323, 152.84611965714953, 32.81520588571815, 10.04331225096559, 
43.434613550585546, 18.04791155499051, 12.93284414761676, 20.02198322410111], 
"eval_len": [101, 17, 54, 90, 27, 13, 34, 18, 20, 20]}

  5%|▍         | 49999/1000000 [38:05<10:09:06, 25.99it/s]global step 50000, trans_decision ep_re 22.151107871795517

{"global_step": 50000, "eval_re": [18.244841750847293, 90.13480484086908, 
14.963354669411803, 13.098400387210322, 16.52240754500748, 16.512714178286497, 
11.279189098606397, 18.28944162521382, 8.358394680284274, 14.107529942218216], 
"eval_len": [35, 60, 20, 19, 27, 23, 16, 20, 10, 16]}

  6%|▌         | 59997/1000000 [46:32<10:02:06, 26.02it/s]global step 60000, trans_decision ep_re 29.680453055838314

{"global_step": 60000, "eval_re": [101.93298199604818, 86.4772260534206, 
22.634894571293646, 6.399012570453265, 19.1459337113855, 12.915371353423511, 
20.01729254674123, 9.062805802730972, 9.13326391580172, 9.085748037084496], 
"eval_len": [58, 66, 23, 10, 27, 15, 20, 12, 11, 13]}

  7%|▋         | 69999/1000000 [55:10<9:57:30, 25.94it/s]global step 70000, trans_decision ep_re 26.295543646275085

{"global_step": 70000, "eval_re": [14.505168029150893, 132.10900376592454, 
16.538790966429133, 13.864121273951044, 16.871913939012675, 19.95200504287334, 
9.045317785924444, 23.5135234805006, 10.220010310913185, 6.335581868071016], 
"eval_len": [21, 123, 18, 22, 20, 22, 11, 28, 12, 18]}

  8%|▊         | 79999/1000000 [1:03:40<9:48:59, 26.03it/s]global step 80000, trans_decision ep_re 40.39517396521705

{"global_step": 80000, "eval_re": [101.83958919352536, 20.493104371130357, 
87.73738793976675, 12.940411281031665, 16.148590907429607, 21.433228641239705, 
29.229894651498487, 69.49402371344684, 14.25372276525229, 30.381786187849478], 
"eval_len": [69, 26, 68, 15, 27, 25, 30, 52, 16, 24]}

  9%|▉         | 89998/1000000 [1:12:10<9:36:14, 26.32it/s]global step 90000, trans_decision ep_re 75.39449566606066

{"global_step": 90000, "eval_re": [65.51948265530757, 82.38948573739778, 
9.68383532286981, 188.2525849820392, 67.65665128244329, 15.282422078990903, 
70.23430878411553, 136.98191431408122, 105.80199666308404, 12.14227484027727], 
"eval_len": [52, 51, 19, 103, 41, 18, 58, 75, 67, 14]}

 10%|▉         | 99998/1000000 [1:20:40<9:30:35, 26.29it/s]global step 100000, trans_decision ep_re 31.454333273938442

{"global_step": 100000, "eval_re": [9.579681251157027, 11.486148376870482, 
62.03140134685773, 16.417510755866587, 9.666303580088632, 10.612093378826497, 
74.13385734027685, 13.040038692499902, 96.81450228787422, 10.761795729066561], 
"eval_len": [18, 26, 74, 21, 13, 16, 71, 19, 67, 15]}

 11%|█         | 109997/1000000 [1:28:54<9:30:02, 26.02it/s]global step 110000, trans_decision ep_re 33.813274610041276

{"global_step": 110000, "eval_re": [14.248736001444664, 9.23496051621142, 
13.749860001458071, 86.57429821916459, 11.148480933865637, 8.850601612784473, 
31.493049252067514, 122.93769878380697, 21.759219584570925, 18.13584119503852], 
"eval_len": [16, 12, 21, 81, 15, 14, 29, 82, 21, 22]}

 12%|█▏        | 119999/1000000 [1:37:22<9:21:58, 26.10it/s]global step 120000, trans_decision ep_re 38.3468455503834

{"global_step": 120000, "eval_re": [18.071441061654994, 26.44976968974308, 
9.419175920470952, 80.46658208537174, 93.4793132896324, 17.725280614848472, 
12.16544537756566, 9.031032582418591, 15.329606951477778, 101.33080793065034], 
"eval_len": [21, 62, 14, 48, 53, 25, 13, 13, 17, 101]}

 13%|█▎        | 129999/1000000 [1:46:00<9:19:53, 25.90it/s]global step 130000, trans_decision ep_re 56.11421606811946

{"global_step": 130000, "eval_re": [15.266414759568702, 12.08938189472845, 
12.685031628798075, 140.80460674078407, 17.843578192710613, 14.32725320234313, 
14.139678943435682, 177.8267147714935, 147.24418150152755, 8.915319045804875], 
"eval_len": [20, 19, 17, 105, 17, 15, 24, 108, 79, 12]}

 14%|█▍        | 139999/1000000 [1:54:20<9:07:32, 26.18it/s]global step 140000, trans_decision ep_re 28.534793855451845

{"global_step": 140000, "eval_re": [11.178768171330644, 10.835309356527679, 
19.58056232584654, 88.2317538043006, 9.019502048550375, 12.325313892836189, 
15.182762130093558, 11.741854028123834, 12.775049167505513, 94.47706362940355], 
"eval_len": [17, 15, 26, 66, 11, 18, 16, 28, 14, 73]}

 15%|█▍        | 149998/1000000 [2:03:00<8:57:20, 26.36it/s]global step 150000, trans_decision ep_re 47.3420949127748

{"global_step": 150000, "eval_re": [8.525777957953103, 16.759376429444472, 
17.725662409435532, 151.67911608668217, 68.54732652767845, 18.814793392479267, 
77.41599757651761, 22.770834622418196, 13.291446791489593, 77.89061733364964], 
"eval_len": [12, 19, 18, 86, 48, 22, 51, 23, 19, 56]}

 16%|█▌        | 159997/1000000 [2:11:15<8:54:40, 26.18it/s]global step 160000, trans_decision ep_re 64.68039031834968

{"global_step": 160000, "eval_re": [76.10196231349275, 87.02697235053112, 
17.282036287958057, 6.334485851764033, 92.97076148915664, 141.2596437853125, 
9.640382962069031, 19.803746179092013, 12.11064176070269, 184.27327020341798], 
"eval_len": [56, 57, 18, 26, 65, 104, 16, 22, 22, 197]}

 17%|█▋        | 169997/1000000 [2:19:42<8:44:42, 26.36it/s]global step 170000, trans_decision ep_re 75.60727189818749

{"global_step": 170000, "eval_re": [28.82516058384066, 18.14712716032374, 
95.57453602926786, 17.292234483455704, 334.50183752439585, 11.531854636671268, 
108.28170765744639, 11.279232345488769, 122.62051483965428, 8.01851372133042], 
"eval_len": [23, 20, 71, 22, 155, 17, 83, 14, 87, 15]}

 18%|█▊        | 179999/1000000 [2:28:20<8:42:37, 26.15it/s]global step 180000, trans_decision ep_re 18.23652803069775

{"global_step": 180000, "eval_re": [19.75323217548372, 7.351356883586707, 
21.763098162878066, 14.873459200175043, 9.048572131237526, 7.064744935457888, 
72.4737578341946, 8.459702289746213, 9.398228473636127, 12.179128220581658], 
"eval_len": [20, 10, 20, 24, 12, 23, 60, 11, 11, 15]}

 19%|█▉        | 189997/1000000 [2:36:34<8:36:23, 26.14it/s]global step 190000, trans_decision ep_re 77.1408638528731

{"global_step": 190000, "eval_re": [102.8346887833046, 14.346744295339558, 
96.29202267914798, 101.7542214684774, 17.870248893646366, 234.53513740278888, 
8.6809531596546, 13.896404249152997, 162.65894309829704, 18.539274498921557], 
"eval_len": [98, 20, 72, 67, 18, 154, 15, 24, 143, 22]}

 20%|█▉        | 199999/1000000 [2:45:10<8:25:43, 26.37it/s]global step 200000, trans_decision ep_re 17.171869496478102

{"global_step": 200000, "eval_re": [9.854032186651107, 11.145375288998098, 
8.547821503158826, 13.641547169056333, 18.648626339676362, 55.0497948719695, 
9.167375499198078, 12.645735795062855, 19.362354161174743, 13.656032149835093], 
"eval_len": [18, 14, 24, 16, 18, 77, 11, 20, 44, 26]}

 21%|██        | 209999/1000000 [2:53:23<8:22:56, 26.18it/s]global step 210000, trans_decision ep_re 40.03666235306517

{"global_step": 210000, "eval_re": [27.970626612025598, 6.408455508687477, 
70.37561815216466, 16.959586860515607, 18.71620916300685, 108.4293058960862, 
10.453178684981598, 100.70711682173422, 11.656029545681626, 28.69049628576786], 
"eval_len": [36, 9, 58, 21, 27, 83, 15, 88, 16, 32]}

 22%|██▏       | 219998/1000000 [3:02:00<8:15:10, 26.25it/s]global step 220000, trans_decision ep_re 72.9802461222414

{"global_step": 220000, "eval_re": [63.7490162795468, 20.576024954643927, 
19.36602516628361, 189.96611871047378, 270.09691751509206, 11.542215196009616, 
11.223616249326707, 124.54923868606643, 10.08215903293625, 8.651129432034775], 
"eval_len": [41, 20, 24, 122, 126, 14, 28, 82, 13, 15]}

 23%|██▎       | 229997/1000000 [3:10:15<8:10:59, 26.14it/s]global step 230000, trans_decision ep_re 24.03810690173613

{"global_step": 230000, "eval_re": [8.467236316349384, 15.498877805829212, 
29.358063127739474, 121.80872708014813, 12.001456475977456, 11.418201448020891, 
9.807967516519874, 7.952563130545883, 11.247939689899104, 12.820036426331889], 
"eval_len": [12, 16, 27, 84, 14, 21, 16, 10, 20, 17]}

 24%|██▍       | 239999/1000000 [3:18:50<7:58:28, 26.47it/s]global step 240000, trans_decision ep_re 50.24585690829465

{"global_step": 240000, "eval_re": [93.91561471963688, 17.83994339214786, 
95.98445444683671, 70.74383803680665, 84.50975487677964, 77.76685012010519, 
14.936344720305286, 10.521862240714835, 15.224212393483521, 21.01569413612993], 
"eval_len": [72, 19, 78, 55, 69, 68, 16, 15, 17, 22]}

 25%|██▍       | 249999/1000000 [3:27:10<7:52:48, 26.44it/s]global step 250000, trans_decision ep_re 25.824760541299412

{"global_step": 250000, "eval_re": [21.78919327938691, 11.969781001199236, 
15.32255632422531, 105.39599701355394, 14.992243265153864, 21.653658442064412, 
14.449191321273304, 13.611547883976717, 19.11855321231869, 19.944883669841765], 
"eval_len": [20, 27, 19, 66, 20, 23, 15, 17, 26, 23]}

 26%|██▌       | 259999/1000000 [3:35:30<7:46:15, 26.45it/s]global step 260000, trans_decision ep_re 36.23328251010039

{"global_step": 260000, "eval_re": [14.396597601172243, 8.51077576723729, 
10.724520773730642, 61.64174510855778, 10.759693320621881, 91.95302183450825, 
20.920518764006154, 16.068969271960015, 11.877535645400407, 115.47944701380924],
"eval_len": [20, 10, 16, 43, 19, 73, 21, 38, 19, 64]}

 27%|██▋       | 269999/1000000 [3:43:40<7:39:19, 26.49it/s]global step 270000, trans_decision ep_re 29.241815710188234

{"global_step": 270000, "eval_re": [87.2544971199498, 15.25715551232628, 
11.015191313006877, 9.24956179116584, 12.723758316105762, 93.96960316892566, 
16.560996799710477, 24.00433004058605, 13.834724436362857, 8.548338603742742], 
"eval_len": [53, 23, 29, 11, 17, 78, 24, 21, 20, 13]}

 28%|██▊       | 279998/1000000 [3:52:03<7:24:53, 26.97it/s]global step 280000, trans_decision ep_re 14.774554605286994

{"global_step": 280000, "eval_re": [15.982547324283212, 8.521245428097274, 
18.17356678970267, 17.053519038027037, 18.104893721237328, 17.08985600082427, 
12.477999667901164, 12.53751282352309, 15.03320492896121, 12.771200330312686], 
"eval_len": [17, 12, 29, 18, 20, 27, 15, 24, 20, 19]}

 29%|██▉       | 289997/1000000 [4:00:23<7:28:03, 26.41it/s]global step 290000, trans_decision ep_re 34.11082336348046

{"global_step": 290000, "eval_re": [22.658414604928005, 103.86732527517677, 
19.819584006731496, 120.30573027945725, 20.643003829558065, 9.660334127743766, 
10.281395118530638, 10.694779843817216, 10.445577618489418, 12.732088930372042],
"eval_len": [24, 60, 21, 104, 27, 12, 25, 12, 15, 16]}

 30%|██▉       | 299999/1000000 [4:08:46<7:21:56, 26.40it/s]global step 300000, trans_decision ep_re 39.82410542268587

{"global_step": 300000, "eval_re": [145.71921313222725, 11.99058847917601, 
19.20669675864306, 14.085856304825231, 72.56430988576811, 7.413017444712714, 
64.5310550939925, 10.222067537008385, 22.166153346158602, 30.342096244346802], 
"eval_len": [80, 14, 20, 19, 54, 10, 53, 12, 23, 41]}

 31%|███       | 309998/1000000 [4:17:06<7:08:11, 26.86it/s]global step 310000, trans_decision ep_re 31.302971398030667

{"global_step": 310000, "eval_re": [26.359018774238557, 27.973485732360967, 
8.016382685416296, 72.4697526534917, 16.581562620480828, 114.36656785824222, 
6.983275371221208, 13.30769847425551, 12.26850610034128, 14.703463710258163], 
"eval_len": [26, 25, 10, 46, 18, 67, 10, 21, 18, 22]}

 32%|███▏      | 319999/1000000 [4:25:40<7:10:11, 26.34it/s]global step 320000, trans_decision ep_re 50.569122087757215

{"global_step": 320000, "eval_re": [7.664715987338955, 21.789558817472223, 
118.74438343106749, 138.36848301498577, 14.154574858653953, 156.28860884784325, 
11.769790707792016, 12.027796824911823, 11.505712419192903, 13.377595968313809],
"eval_len": [10, 24, 63, 87, 16, 117, 28, 15, 16, 16]}

 33%|███▎      | 329999/1000000 [4:34:00<7:03:11, 26.39it/s]global step 330000, trans_decision ep_re 62.86428801941779

{"global_step": 330000, "eval_re": [103.69345875427781, 146.82808382031595, 
9.4045753104369, 9.581856413916563, 193.4543590429672, 18.64456041415288, 
108.69610067967852, 19.581689259777992, 10.37284380101907, 8.385352697635087], 
"eval_len": [75, 81, 15, 29, 106, 21, 85, 21, 16, 14]}

 34%|███▍      | 339999/1000000 [4:42:11<6:56:31, 26.41it/s]global step 340000, trans_decision ep_re 34.404931516866384

{"global_step": 340000, "eval_re": [138.66327887115958, 15.686597451787973, 
11.045517329074887, 17.511731616808135, 8.279705704360275, 17.36447723541948, 
13.521653759275406, 96.28133381001604, 10.755159678927187, 14.9398597118348], 
"eval_len": [80, 22, 21, 23, 11, 18, 14, 83, 18, 16]}

 35%|███▍      | 349998/1000000 [4:50:33<6:43:05, 26.88it/s]global step 350000, trans_decision ep_re 31.321200681542848

{"global_step": 350000, "eval_re": [118.08855064402445, 7.357993402164743, 
11.477440526455423, 10.536191426973442, 101.45791200581853, 12.828583893002744, 
7.994149057447778, 10.015029911199509, 15.905168774785327, 17.55098717355652], 
"eval_len": [92, 13, 18, 12, 63, 18, 19, 17, 20, 41]}

 36%|███▌      | 359997/1000000 [4:58:56<6:46:35, 26.23it/s]global step 360000, trans_decision ep_re 25.368886638578132

{"global_step": 360000, "eval_re": [24.00232959604373, 12.053489579148943, 
11.311309899144353, 8.650970756634182, 9.835462536687489, 37.6744570329353, 
12.988126741933957, 100.61132255391962, 10.25460903948005, 26.30678864985369], 
"eval_len": [27, 17, 16, 11, 18, 68, 22, 64, 15, 28]}

 37%|███▋      | 369999/1000000 [5:07:30<6:37:49, 26.39it/s]global step 370000, trans_decision ep_re 35.82147640075868

{"global_step": 370000, "eval_re": [28.654267094191873, 28.7025063947673, 
14.275835520731585, 95.91246363991394, 13.105000459041793, 17.28830246512712, 
16.547258110934585, 94.78049917263625, 10.841799848173421, 38.106831302068905], 
"eval_len": [25, 56, 23, 205, 17, 21, 24, 83, 13, 30]}

 38%|███▊      | 379999/1000000 [5:15:42<6:32:07, 26.35it/s]global step 380000, trans_decision ep_re 30.48041898115655

{"global_step": 380000, "eval_re": [66.15436125357532, 16.559163122726364, 
23.97085462522721, 13.45100246500671, 14.922610808744023, 18.15906123531898, 
79.83017383792205, 14.620701929123706, 48.43082061135187, 8.705439922569283], 
"eval_len": [48, 23, 24, 20, 17, 20, 71, 19, 52, 21]}

 39%|███▉      | 389998/1000000 [5:24:05<6:19:41, 26.78it/s]global step 390000, trans_decision ep_re 33.768459256767514

{"global_step": 390000, "eval_re": [64.88323258390199, 12.111722586774519, 
16.90658107616263, 19.19722973538735, 19.559762303081115, 10.933687206506331, 
19.639435938510932, 143.59258524575296, 12.2795466376857, 18.580809253911625], 
"eval_len": [42, 21, 22, 19, 19, 17, 21, 109, 20, 19]}

 40%|███▉      | 399997/1000000 [5:32:40<6:19:47, 26.33it/s]global step 400000, trans_decision ep_re 53.927800831992116

{"global_step": 400000, "eval_re": [12.757926854273924, 45.57769560115061, 
21.926840034263787, 66.72001570931924, 119.27126558612716, 41.187399444920985, 
192.34275451264332, 11.846546856881183, 10.45741450788956, 17.190149212451264], 
"eval_len": [17, 49, 26, 66, 79, 40, 119, 14, 14, 17]}

 41%|████      | 409999/1000000 [5:40:52<6:14:07, 26.28it/s]global step 410000, trans_decision ep_re 33.041861701274556

{"global_step": 410000, "eval_re": [12.729332733741131, 12.669329162437785, 
11.104159763331358, 12.129410725929722, 110.01480288719515, 20.747500129948115, 
15.6935542122779, 10.267177640489553, 107.72737987654381, 17.33596988085098], 
"eval_len": [19, 16, 15, 21, 66, 28, 19, 12, 93, 23]}

 42%|████▏     | 419998/1000000 [5:49:13<6:01:03, 26.77it/s]global step 420000, trans_decision ep_re 22.136900753841406

{"global_step": 420000, "eval_re": [78.11608276831664, 14.740485722628069, 
17.885956667176625, 15.855699402217379, 12.254931995884114, 20.616897484048515, 
12.335947177623137, 16.277165070182562, 17.60946989352247, 15.676371356814553], 
"eval_len": [60, 26, 18, 17, 19, 19, 14, 22, 24, 19]}

 43%|████▎     | 429997/1000000 [5:57:34<5:57:58, 26.54it/s]global step 430000, trans_decision ep_re 23.57907846306606

{"global_step": 430000, "eval_re": [14.356912844196549, 13.760190784818164, 
21.28376126244584, 10.628316227038901, 10.449672341245883, 102.42549504714319, 
23.868072460426294, 15.102470728528619, 17.51727716190499, 6.398615772912174], 
"eval_len": [27, 18, 23, 13, 15, 86, 30, 21, 19, 12]}

 44%|████▍     | 439999/1000000 [6:05:55<5:52:35, 26.47it/s]global step 440000, trans_decision ep_re 46.83036493244772

{"global_step": 440000, "eval_re": [12.150209051657137, 18.17247004373898, 
12.991673131903575, 15.475493334872082, 15.662550779279071, 65.77447636634871, 
62.232403442359974, 89.05428725417246, 9.445516788590574, 167.34456913155464], 
"eval_len": [14, 26, 17, 16, 19, 47, 60, 66, 14, 99]}

 45%|████▍     | 449999/1000000 [6:14:30<5:48:05, 26.33it/s]global step 450000, trans_decision ep_re 52.22858659051972

{"global_step": 450000, "eval_re": [15.273472011339246, 107.33474961113305, 
9.353378503758721, 207.6649077454045, 11.031544490398666, 123.74314190585991, 
12.660826364849655, 11.22839209352148, 11.481308814765764, 12.51414436416615], 
"eval_len": [33, 74, 14, 120, 15, 105, 18, 12, 16, 18]}

 46%|████▌     | 459999/1000000 [6:22:50<5:40:44, 26.41it/s]global step 460000, trans_decision ep_re 19.49969227725243

{"global_step": 460000, "eval_re": [9.608591528422725, 69.98554883601977, 
16.452526890183158, 12.897963144846031, 11.833964431524928, 20.428994323198854, 
14.532242873372788, 8.65506877801563, 15.478253526672159, 15.12376844026825], 
"eval_len": [22, 71, 25, 20, 19, 21, 26, 11, 16, 17]}

 47%|████▋     | 469999/1000000 [6:31:10<5:34:39, 26.39it/s]global step 470000, trans_decision ep_re 22.078476446198266

{"global_step": 470000, "eval_re": [19.600372979155047, 9.153994506166345, 
12.1635307671805, 11.866036818646485, 13.150349863584236, 8.632454846262364, 
7.132749364761732, 13.93097883533353, 17.211361852260744, 107.94293462863165], 
"eval_len": [26, 11, 16, 14, 26, 12, 13, 19, 20, 69]}

 48%|████▊     | 479999/1000000 [6:39:20<5:28:24, 26.39it/s]global step 480000, trans_decision ep_re 28.69815873907346

{"global_step": 480000, "eval_re": [12.87981140881433, 14.753019741180397, 
112.5763214673392, 34.357541958377524, 9.230171215560187, 10.085494884103596, 
6.984833600276421, 10.138613185683651, 7.171506240416979, 68.8042736889823], 
"eval_len": [16, 23, 67, 60, 13, 12, 9, 17, 13, 72]}

 49%|████▉     | 489998/1000000 [6:47:41<5:18:34, 26.68it/s]global step 490000, trans_decision ep_re 23.470172667941377

{"global_step": 490000, "eval_re": [24.003275409705747, 11.321188015720486, 
9.898562731478302, 109.7299802936127, 18.94937241772293, 15.302107596014674, 
9.655870150581872, 16.81171205608693, 10.650429292238774, 8.379228716251331], 
"eval_len": [26, 13, 14, 86, 24, 24, 23, 19, 14, 11]}

 50%|████▉     | 499997/1000000 [6:56:03<5:17:22, 26.26it/s]global step 500000, trans_decision ep_re 39.290396724606936

{"global_step": 500000, "eval_re": [11.70925006580012, 161.8769740886324, 
38.05527359412499, 9.880889686511438, 10.486787165988925, 64.6465777755785, 
7.662046625378611, 9.694860787069615, 11.502600323935129, 67.38870713304955], 
"eval_len": [16, 102, 63, 18, 14, 38, 19, 11, 14, 75]}

 51%|█████     | 509999/1000000 [7:04:25<5:10:31, 26.30it/s]global step 510000, trans_decision ep_re 13.190295156439115

{"global_step": 510000, "eval_re": [5.763942242795147, 12.398597151501594, 
10.567180597876431, 11.90114797312785, 19.048205593601264, 11.518084674647437, 
12.331449496019419, 18.5785724149132, 13.67978307585369, 16.115988344055115], 
"eval_len": [9, 23, 14, 22, 21, 22, 15, 18, 20, 22]}

 52%|█████▏    | 519998/1000000 [7:13:00<5:00:30, 26.62it/s]global step 520000, trans_decision ep_re 41.132293357075824

{"global_step": 520000, "eval_re": [22.3309086973779, 11.260734475269684, 
12.406634528618566, 11.754702906987026, 89.98191133721443, 44.657468488631686, 
15.972114522304972, 178.38740720525004, 11.151026741084145, 13.420024668019824],
"eval_len": [19, 20, 16, 23, 72, 62, 19, 134, 27, 13]}

 53%|█████▎    | 529999/1000000 [7:21:20<4:57:17, 26.35it/s]global step 530000, trans_decision ep_re 39.74302254478558

{"global_step": 530000, "eval_re": [97.62919027437378, 6.723053527746973, 
10.760834017188385, 8.85859755706479, 10.155452379327949, 9.626690568601465, 
173.06826612199123, 59.077664583925824, 12.599094334612948, 8.931382083022484], 
"eval_len": [82, 16, 19, 13, 14, 12, 99, 91, 20, 12]}

 54%|█████▍    | 539999/1000000 [7:29:31<4:49:40, 26.47it/s]global step 540000, trans_decision ep_re 46.18459552606176

{"global_step": 540000, "eval_re": [72.88475965753207, 21.36703778789319, 
13.257767934281848, 203.89075755810558, 53.18206949563048, 32.02013079039824, 
11.343206008450313, 8.968158064934842, 10.910270329230572, 34.02179763416055], 
"eval_len": [85, 21, 20, 131, 54, 40, 39, 15, 15, 27]}

 55%|█████▍    | 549998/1000000 [7:37:53<4:42:16, 26.57it/s]global step 550000, trans_decision ep_re 17.744048533014755

{"global_step": 550000, "eval_re": [13.86658971700171, 6.825916438778445, 
19.30441642848128, 19.67930937348232, 17.816955191691445, 16.030958949894913, 
12.924043358084262, 16.00403407859474, 20.60194450346863, 34.386317290669794], 
"eval_len": [19, 12, 21, 19, 18, 16, 20, 22, 19, 29]}

 56%|█████▌    | 559997/1000000 [7:46:13<4:38:47, 26.30it/s]global step 560000, trans_decision ep_re 18.762312275615166

{"global_step": 560000, "eval_re": [12.180544814401333, 7.3707357881253595, 
10.000266122811363, 65.78779021680049, 31.375706013121544, 14.294079348939377, 
8.755279209299097, 18.28155438894862, 7.5982673389428275, 11.978899514761643], 
"eval_len": [18, 11, 18, 49, 27, 16, 11, 18, 11, 15]}

 57%|█████▋    | 569999/1000000 [7:54:34<4:31:30, 26.40it/s]global step 570000, trans_decision ep_re 49.05118201156201

{"global_step": 570000, "eval_re": [22.396666301780257, 20.50983926408413, 
72.99593433584322, 10.866502672368776, 25.838168627809658, 132.42359998313387, 
24.827815633636646, 24.174697770110665, 11.776624424817818, 144.70197110203512],
"eval_len": [23, 21, 74, 20, 50, 86, 30, 30, 14, 81]}

 58%|█████▊    | 579998/1000000 [8:02:55<4:21:55, 26.73it/s]global step 580000, trans_decision ep_re 24.135847572012786

{"global_step": 580000, "eval_re": [78.11627284759749, 10.375451486307897, 
11.536591130047762, 7.899633882606948, 71.22816923803916, 14.409209517889781, 
7.478781289761392, 21.56520270735299, 8.39599297995008, 10.353170640574346], 
"eval_len": [79, 14, 13, 15, 55, 17, 11, 21, 22, 13]}

 59%|█████▉    | 589997/1000000 [8:11:15<4:18:11, 26.47it/s]global step 590000, trans_decision ep_re 15.999146444285222

{"global_step": 590000, "eval_re": [15.375736656651915, 20.707296792011284, 
10.694981207498627, 13.576405540134292, 20.447359993460985, 23.326427152525156, 
14.085358435712271, 16.35775159986033, 13.742629770697253, 11.677517294300088], 
"eval_len": [18, 24, 14, 20, 22, 22, 22, 24, 15, 17]}

 60%|█████▉    | 599999/1000000 [8:19:35<4:10:30, 26.61it/s]global step 600000, trans_decision ep_re 37.81838165079294

{"global_step": 600000, "eval_re": [13.577465710817295, 73.52458349856711, 
104.21462406115532, 8.874275583327051, 93.45250261485288, 31.641920128826133, 
11.269484482906227, 11.002064699090537, 12.298968762792903, 18.327926965593864],
"eval_len": [15, 71, 79, 12, 74, 28, 14, 12, 16, 27]}

 61%|██████    | 609998/1000000 [8:27:56<4:03:05, 26.74it/s]global step 610000, trans_decision ep_re 32.177064834086615

{"global_step": 610000, "eval_re": [13.67125510707704, 8.579693151572464, 
66.18301277030568, 9.725314535637786, 24.67766959914344, 94.87349416917047, 
6.0989309947271995, 67.2945288872857, 9.779060400552337, 20.887688725394046], 
"eval_len": [21, 14, 64, 13, 23, 80, 9, 43, 11, 29]}

 62%|██████▏   | 619997/1000000 [8:36:30<4:00:45, 26.31it/s]global step 620000, trans_decision ep_re 63.29641141893379

{"global_step": 620000, "eval_re": [100.21390126214406, 65.01545188042998, 
20.559449376924803, 30.8564911979938, 261.30667842884196, 8.607346756171571, 
96.53037491999692, 18.20796130330179, 20.004667723567152, 11.661791339965765], 
"eval_len": [58, 41, 27, 26, 126, 18, 93, 25, 22, 25]}

 63%|██████▎   | 629999/1000000 [8:44:50<3:53:37, 26.40it/s]global step 630000, trans_decision ep_re 15.97868533099178

{"global_step": 630000, "eval_re": [13.8451783420822, 14.443583666724734, 
7.371365846031205, 14.009494661025807, 32.9743917268539, 14.441538683194597, 
23.520774766284802, 15.590445397287763, 10.433783012919202, 13.156297207513585],
"eval_len": [15, 19, 11, 24, 27, 17, 30, 18, 17, 18]}

 64%|██████▍   | 639999/1000000 [8:53:01<3:47:56, 26.32it/s]global step 640000, trans_decision ep_re 29.29289653131098

{"global_step": 640000, "eval_re": [69.7617087126832, 34.453117884514995, 
13.640741052712237, 11.946195722741884, 11.412963002692841, 19.02675819655352, 
22.706084348573455, 24.448285161117823, 69.65477458228443, 15.878336649235441], 
"eval_len": [48, 31, 16, 16, 17, 20, 26, 25, 55, 29]}

 65%|██████▍   | 649998/1000000 [9:01:22<3:37:26, 26.83it/s]global step 650000, trans_decision ep_re 51.499063151619666

{"global_step": 650000, "eval_re": [98.03400925478445, 10.190027720495278, 
15.097780986497714, 120.27881137069764, 19.36412565986128, 11.214379673271631, 
10.843966028897157, 18.898684222726153, 198.1755636865676, 12.893282912397781], 
"eval_len": [75, 12, 28, 80, 18, 17, 13, 24, 187, 21]}

 66%|██████▌   | 659997/1000000 [9:09:43<3:35:05, 26.35it/s]global step 660000, trans_decision ep_re 57.042810419117345

{"global_step": 660000, "eval_re": [103.05142190213071, 9.527604861413504, 
14.184524637616677, 13.685387439428267, 8.268015138807518, 80.6888851413068, 
99.92702819256316, 14.797871413084065, 213.4974493236861, 12.799916141136533], 
"eval_len": [97, 18, 20, 23, 13, 55, 92, 20, 237, 20]}

 67%|██████▋   | 669999/1000000 [9:18:20<3:29:21, 26.27it/s]global step 670000, trans_decision ep_re 17.632584034519184

{"global_step": 670000, "eval_re": [14.06655826216791, 31.270079840722453, 
12.144176839445462, 27.449582341785174, 16.83214725010854, 11.675782588120349, 
12.430309455676145, 11.59958154953781, 11.000077805923807, 27.857544411704186], 
"eval_len": [16, 25, 19, 24, 26, 19, 16, 17, 14, 66]}

 68%|██████▊   | 679999/1000000 [9:26:40<3:21:33, 26.46it/s]global step 680000, trans_decision ep_re 39.033367216686585

{"global_step": 680000, "eval_re": [12.239545776606676, 12.106785080593577, 
19.462940931902494, 8.519786375443312, 108.46261097677836, 13.66719233116729, 
113.27593173832088, 21.77807441261067, 69.81713248510036, 11.003672058342335], 
"eval_len": [25, 13, 21, 14, 91, 16, 69, 24, 42, 12]}

 69%|██████▉   | 689999/1000000 [9:35:00<3:14:57, 26.50it/s]global step 690000, trans_decision ep_re 14.341588751402076

{"global_step": 690000, "eval_re": [10.679489482228924, 28.366071963817184, 
13.518986482521242, 13.925348676480052, 9.713050546415253, 26.352538655305956, 
10.629745839877158, 11.048464895496679, 10.558529537572033, 8.623661434306293], 
"eval_len": [13, 25, 19, 27, 14, 45, 15, 17, 15, 15]}

 70%|██████▉   | 699999/1000000 [9:43:10<3:10:01, 26.31it/s]global step 700000, trans_decision ep_re 20.15706922260566

{"global_step": 700000, "eval_re": [11.726634837539336, 13.80123832921652, 
11.035952785463914, 9.743902593191299, 26.055248271726374, 21.025906719084833, 
76.54214968929064, 10.499534933775577, 12.82957629021839, 8.310547776549702], 
"eval_len": [20, 14, 15, 14, 29, 20, 44, 12, 19, 14]}

 71%|███████   | 709998/1000000 [9:51:31<2:59:40, 26.90it/s]global step 710000, trans_decision ep_re 42.020455509364034

{"global_step": 710000, "eval_re": [116.38142152718036, 124.90073995581966, 
15.650822861507926, 32.68200992419454, 10.058771408196105, 20.710666265933217, 
9.804537844268323, 13.995973599016976, 68.37753854431523, 7.642073163207988], 
"eval_len": [70, 92, 19, 29, 17, 26, 16, 16, 49, 19]}

 72%|███████▏  | 719997/1000000 [9:59:52<2:56:53, 26.38it/s]global step 720000, trans_decision ep_re 71.3057669542726

{"global_step": 720000, "eval_re": [22.23770030536911, 13.930679622136688, 
17.11397476356814, 231.8702718757352, 282.78988062085034, 8.055478153688743, 
16.656672456773208, 11.036062326333877, 10.769691541170266, 98.59725787710042], 
"eval_len": [25, 16, 45, 112, 247, 13, 29, 18, 16, 91]}

 73%|███████▎  | 729999/1000000 [10:08:15<2:49:42, 26.52it/s]global step 730000, trans_decision ep_re 31.924873385824675

{"global_step": 730000, "eval_re": [19.905693402220297, 12.41940424287101, 
29.35124197795517, 39.858135537085346, 22.17732954195964, 11.025870912201018, 
131.64928418998957, 21.070590284338042, 20.590951214332122, 11.200232555294544],
"eval_len": [24, 16, 27, 37, 22, 14, 80, 19, 47, 15]}

 74%|███████▍  | 739998/1000000 [10:16:50<2:42:52, 26.61it/s]global step 740000, trans_decision ep_re 59.15103505629893

{"global_step": 740000, "eval_re": [9.811284838662216, 18.47181448269257, 
10.38631949504342, 168.15386468347427, 16.233857337262265, 161.22183303650525, 
91.37615742918777, 15.95956141757287, 88.27855790511032, 11.617099937478345], 
"eval_len": [13, 23, 16, 126, 21, 93, 99, 21, 77, 15]}

 75%|███████▍  | 749999/1000000 [10:25:00<2:38:25, 26.30it/s]global step 750000, trans_decision ep_re 21.047138915349

{"global_step": 750000, "eval_re": [12.468819356305621, 14.006814633819163, 
11.420848126628764, 13.254345729668476, 14.52617395956037, 12.392261078794586, 
9.28142981181915, 83.92282692047752, 8.340266695201683, 30.857602841214657], 
"eval_len": [27, 18, 17, 20, 19, 16, 14, 59, 12, 26]}

 76%|███████▌  | 759998/1000000 [10:33:23<2:29:38, 26.73it/s]global step 760000, trans_decision ep_re 26.904580275745065

{"global_step": 760000, "eval_re": [18.300437100208928, 12.202952797795207, 
145.59451727599009, 11.788874656590778, 8.770275218675362, 21.967714041505634, 
6.217280383887621, 11.101853663618476, 22.0244529862648, 11.077444632913764], 
"eval_len": [20, 19, 106, 14, 11, 27, 18, 19, 24, 13]}

 77%|███████▋  | 769997/1000000 [10:41:45<2:25:46, 26.30it/s]global step 770000, trans_decision ep_re 24.367181569415337

{"global_step": 770000, "eval_re": [27.40481556321091, 7.817105571047464, 
18.53858876565342, 12.20252608373539, 110.54643966301515, 13.733156510673071, 
8.816544301855517, 10.364202139593287, 17.621060578811655, 16.62737651655748], 
"eval_len": [27, 11, 25, 25, 67, 16, 15, 13, 21, 17]}

 78%|███████▊  | 779999/1000000 [10:50:20<2:20:41, 26.06it/s]global step 780000, trans_decision ep_re 34.81265475727769

{"global_step": 780000, "eval_re": [10.059286287152132, 22.538566897498733, 
123.21810851286007, 18.71191957787433, 9.150179778818467, 98.21578580848224, 
14.728430712951653, 18.93856096060359, 21.5943584239599, 10.971350612575879], 
"eval_len": [17, 25, 71, 23, 13, 84, 23, 19, 22, 16]}

 79%|███████▉  | 789998/1000000 [10:58:35<2:12:01, 26.51it/s]global step 790000, trans_decision ep_re 35.72324191216855

{"global_step": 790000, "eval_re": [15.033141496457835, 24.733308705922454, 
69.71570199079113, 92.51800153876576, 93.38041201210679, 11.466414882605015, 
11.702954343919464, 15.100547640682818, 12.20762519280764, 11.374311317626573], 
"eval_len": [29, 22, 51, 83, 65, 15, 23, 18, 14, 23]}

 80%|███████▉  | 799999/1000000 [11:07:10<2:07:04, 26.23it/s]global step 800000, trans_decision ep_re 21.871056977753035

{"global_step": 800000, "eval_re": [10.926452470328115, 9.614031069671626, 
27.3837202588957, 60.30570365245389, 15.465334416375311, 19.022092632123613, 
13.498519506200253, 14.649918022694925, 20.324033586328188, 27.520764162458722],
"eval_len": [19, 21, 26, 85, 20, 26, 16, 20, 20, 25]}

 81%|████████  | 809997/1000000 [11:15:24<2:02:27, 25.86it/s]global step 810000, trans_decision ep_re 22.60980284883636

{"global_step": 810000, "eval_re": [16.19558318940459, 17.55871594758706, 
17.575367721840987, 14.704030266933906, 23.546086914354266, 22.547525099779136, 
7.043029977849085, 19.78186692721042, 78.10950243078902, 9.036320012615148], 
"eval_len": [20, 20, 18, 16, 27, 25, 12, 23, 56, 12]}

 82%|████████▏ | 819997/1000000 [11:23:52<1:54:44, 26.15it/s]global step 820000, trans_decision ep_re 20.601875718168255

{"global_step": 820000, "eval_re": [85.09860335948756, 7.927687168899728, 
16.905870435154547, 14.412520783434825, 7.669915609814525, 19.56483219763161, 
7.2682022498402725, 8.262727278426665, 9.902800697922931, 29.0055974010699], 
"eval_len": [92, 15, 24, 27, 12, 27, 10, 15, 14, 24]}

 83%|████████▎ | 829998/1000000 [11:32:21<1:47:28, 26.36it/s]global step 830000, trans_decision ep_re 33.12275698603689

{"global_step": 830000, "eval_re": [7.936590513716191, 92.97186394669599, 
15.42802033423328, 8.728806657564316, 14.063655469191728, 71.03961114261188, 
7.923473295849378, 16.9624393719173, 58.372130070414045, 37.80097905817471], 
"eval_len": [22, 80, 18, 14, 15, 66, 11, 22, 38, 36]}

 84%|████████▍ | 839999/1000000 [11:41:00<1:42:37, 25.99it/s]global step 840000, trans_decision ep_re 36.743486404368625

{"global_step": 840000, "eval_re": [36.375416224809776, 12.171522108913829, 
92.74586630482676, 154.71516843329712, 8.851283932196832, 10.019897423834749, 
10.714088643539336, 9.833287328247042, 10.815388037451365, 21.19294560656949], 
"eval_len": [43, 17, 68, 77, 16, 17, 15, 26, 21, 24]}

 85%|████████▍ | 849999/1000000 [11:49:16<1:36:50, 25.82it/s]global step 850000, trans_decision ep_re 51.5689936422318

{"global_step": 850000, "eval_re": [137.7643726694429, 13.468131942464455, 
12.909925484849664, 14.416735074522418, 159.8451938905465, 16.04967590097395, 
10.03730150288917, 18.14421835836788, 119.18234198386848, 13.872039614392678], 
"eval_len": [88, 19, 18, 22, 94, 26, 13, 22, 84, 18]}

 86%|████████▌ | 859999/1000000 [11:57:44<1:28:53, 26.25it/s]global step 860000, trans_decision ep_re 18.198741481343202

{"global_step": 860000, "eval_re": [10.849417586905824, 7.08734245166193, 
20.43031572941175, 14.419882483878906, 10.463086662337309, 62.200654467025515, 
11.378010377112538, 19.678066117340563, 15.466775851056589, 10.013863086701127],
"eval_len": [14, 11, 22, 23, 16, 59, 16, 19, 16, 16]}

 87%|████████▋ | 869998/1000000 [12:06:11<1:21:55, 26.45it/s]global step 870000, trans_decision ep_re 28.550536591762835

{"global_step": 870000, "eval_re": [124.73667276459867, 11.820002269953413, 
12.740976094621354, 19.223171458032233, 65.3252153012493, 9.732030985832333, 
12.780550021067276, 7.462754282834171, 10.868022837799213, 10.815969901640358], 
"eval_len": [75, 17, 23, 23, 40, 12, 14, 13, 14, 16]}

 88%|████████▊ | 879999/1000000 [12:14:50<1:16:49, 26.03it/s]global step 880000, trans_decision ep_re 13.758334218155625

{"global_step": 880000, "eval_re": [14.225490218134487, 12.348111036880413, 
13.795758520742307, 11.861210826320649, 26.89730517868189, 6.471781127946824, 
12.733167681737983, 17.057677635012105, 13.126293792680302, 9.066546163419266], 
"eval_len": [23, 15, 17, 21, 27, 11, 14, 21, 18, 16]}

 89%|████████▉ | 889999/1000000 [12:23:20<1:10:02, 26.18it/s]global step 890000, trans_decision ep_re 31.476167468691592

{"global_step": 890000, "eval_re": [10.713061354364264, 74.66722593876003, 
6.542490670391882, 66.76432005496247, 9.127420184253479, 8.232479297759992, 
32.45976048630404, 75.11322400494886, 16.1277548780468, 15.013937817124113], 
"eval_len": [14, 59, 10, 40, 11, 11, 27, 81, 16, 17]}

 90%|████████▉ | 899999/1000000 [12:31:50<1:03:55, 26.08it/s]global step 900000, trans_decision ep_re 69.03527604086061

{"global_step": 900000, "eval_re": [10.172605318806458, 420.6085043563424, 
19.926842046644666, 19.629475741406555, 9.78115942572236, 13.065125070097055, 
10.294740148277265, 84.19089121852089, 90.81344668098478, 11.869970401803705], 
"eval_len": [15, 174, 23, 27, 12, 24, 22, 65, 63, 14]}

 91%|█████████ | 909999/1000000 [12:40:04<57:22, 26.14it/s]global step 910000, trans_decision ep_re 26.769799244395898

{"global_step": 910000, "eval_re": [13.172922616846805, 48.61928142117328, 
127.70316761633505, 10.237766721799398, 7.378266972921023, 10.910189322417601, 
15.832741269879547, 15.215152520098943, 9.904252732126217, 8.724251250361117], 
"eval_len": [16, 55, 78, 18, 10, 13, 28, 20, 11, 13]}

 92%|█████████▏| 919997/1000000 [12:48:32<51:41, 25.79it/s]global step 920000, trans_decision ep_re 24.009165373015506

{"global_step": 920000, "eval_re": [72.09506304146146, 71.38768932548477, 
7.5066238776146434, 25.24210434897638, 9.500842726705475, 9.950260368471163, 
11.24115340874529, 10.358934640067332, 11.89259370930426, 10.91638828332429], 
"eval_len": [69, 48, 27, 47, 13, 26, 21, 13, 15, 19]}

 93%|█████████▎| 929999/1000000 [12:57:10<45:05, 25.87it/s]global step 930000, trans_decision ep_re 39.60884024768582

{"global_step": 930000, "eval_re": [95.3140079389734, 11.174275455698965, 
8.771421608984426, 76.7374701191173, 26.310812479689314, 14.347232523732478, 
19.488903415494608, 118.75316470225498, 9.790710504894498, 15.400403728018176], 
"eval_len": [78, 15, 11, 60, 46, 17, 24, 76, 15, 17]}

 94%|█████████▍| 939999/1000000 [13:05:40<38:53, 25.71it/s]global step 940000, trans_decision ep_re 20.159440662279557

{"global_step": 940000, "eval_re": [24.08630662199536, 78.07019348679692, 
9.470866305970517, 7.034241547003426, 16.29822646795101, 11.012350212998927, 
13.07792415206415, 18.277528502346325, 14.318869216228771, 9.947900109440154], 
"eval_len": [24, 54, 20, 11, 18, 14, 17, 20, 21, 17]}

 95%|█████████▍| 949999/1000000 [13:14:10<32:12, 25.87it/s]global step 950000, trans_decision ep_re 45.78851949803679

{"global_step": 950000, "eval_re": [9.534278890594148, 19.81148153028503, 
20.339657704277826, 127.3748680051314, 98.04420118438458, 68.49654148348829, 
18.933433140960144, 9.80033243753502, 73.27317590210575, 12.277224701605709], 
"eval_len": [14, 19, 19, 65, 86, 51, 20, 12, 61, 20]}

 96%|█████████▌| 959999/1000000 [13:22:35<25:45, 25.88it/s]global step 960000, trans_decision ep_re 24.7659081985575

{"global_step": 960000, "eval_re": [69.79847808988144, 7.6376932506831325, 
6.205346547021161, 8.068227136054155, 14.699636701989776, 9.284611446157221, 
10.550928591849758, 90.8377078309292, 14.201221296826917, 16.375231094182226], 
"eval_len": [49, 26, 8, 11, 15, 21, 15, 63, 20, 27]}

 97%|█████████▋| 969997/1000000 [13:31:06<19:18, 25.89it/s]global step 970000, trans_decision ep_re 31.156406189950008

{"global_step": 970000, "eval_re": [11.484222091564837, 10.24791664833317, 
10.06054084980827, 29.10435496341028, 10.303328033662945, 6.079047895133317, 
7.079253497867034, 86.96029456643835, 67.73024186635165, 72.51486148693024], 
"eval_len": [15, 16, 22, 25, 12, 17, 15, 70, 45, 87]}

 98%|█████████▊| 979997/1000000 [13:39:35<12:48, 26.04it/s]global step 980000, trans_decision ep_re 12.547277659406806

{"global_step": 980000, "eval_re": [10.45911339214139, 14.06804054726491, 
14.993833648955535, 10.780741561057303, 15.994658707565899, 9.608926002037583, 
9.087238078766907, 8.236154283657307, 16.668466855750168, 15.575603516871052], 
"eval_len": [15, 15, 18, 18, 18, 15, 20, 11, 22, 16]}

 99%|█████████▉| 989997/1000000 [13:48:05<06:23, 26.12it/s]global step 990000, trans_decision ep_re 66.7405035948519

{"global_step": 990000, "eval_re": [169.87798946031216, 18.835974427065867, 
15.28367442452332, 20.496526589774422, 94.12159680524267, 102.4427530069675, 
207.82859328256765, 17.455420989782382, 10.336532925369754, 10.7259740369133], 
"eval_len": [122, 22, 19, 25, 84, 82, 167, 29, 17, 15]}

100%|█████████▉| 999999/1000000 [13:56:37<00:00, 25.37it/s]global step 1000000, trans_decision ep_re 31.22615544992659

{"global_step": 1000000, "eval_re": [9.758997583911505, 20.13367755112161, 
6.518348441799995, 32.040751185149084, 19.01446744566499, 108.74422156003823, 
67.76475029284018, 20.71058144548577, 11.12847593860124, 16.4472830546533], 
"eval_len": [13, 27, 13, 29, 19, 66, 54, 23, 22, 19]}

100%|██████████| 1000000/1000000 [13:56:50<00:00, 19.92it/s]
