
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:40<13:12:06, 20.83it/s]global step 10000, trans_decision ep_re 29.801708552861903

{"global_step": 10000, "eval_re": [18.15305847935609, 26.97189991929164, 
15.343714038396845, 91.62880217556453, 11.870328651652112, 42.10758433692732, 
25.4879553976771, 7.0230951823828915, 27.286187101331862, 32.144460246038605], 
"eval_len": [40, 39, 40, 89, 33, 43, 41, 24, 46, 45]}

  2%|▏         | 19999/1000000 [16:01<12:53:01, 21.13it/s]global step 20000, trans_decision ep_re 34.29978065907771

{"global_step": 20000, "eval_re": [28.177821326369408, 8.027770646606786, 
164.70631128635526, 22.72638725524732, 16.56336649435684, 21.94361786135556, 
25.620324449732205, 22.570688691076583, 16.32210376220954, 16.339414817467492], 
"eval_len": [51, 21, 126, 41, 38, 34, 49, 33, 29, 27]}

  3%|▎         | 29999/1000000 [26:50<12:48:24, 21.04it/s]global step 30000, trans_decision ep_re 32.933581928453584

{"global_step": 30000, "eval_re": [185.94199028461284, 15.665336651123612, 
18.444013515485953, 7.323692928553917, 4.918282296153791, 29.944550882804723, 
22.97902619122031, 14.641808363871169, 15.747206073520166, 13.729912097189368], 
"eval_len": [242, 32, 27, 22, 19, 40, 36, 34, 29, 36]}

  4%|▍         | 39998/1000000 [37:30<12:28:54, 21.36it/s]global step 40000, trans_decision ep_re 17.31566562943957

{"global_step": 40000, "eval_re": [11.63751414889264, 16.712168525739045, 
23.04051831052812, 12.501851022841587, 37.22771844639797, 16.572435099697678, 
20.240431007962226, 6.910217746815062, 19.18754151854683, 9.126260466974536], 
"eval_len": [32, 28, 35, 34, 41, 28, 37, 19, 32, 23]}

  5%|▍         | 49998/1000000 [48:10<12:31:50, 21.06it/s]global step 50000, trans_decision ep_re 17.087636636784044

{"global_step": 50000, "eval_re": [15.25984248537636, 15.968393741813419, 
12.039642701507965, 19.932958630856714, 14.746681872711013, 27.35252012682393, 
9.09681823322476, 25.31124544649086, 9.082152862915143, 22.086110266120258], 
"eval_len": [25, 27, 28, 34, 28, 36, 19, 38, 34, 33]}

  6%|▌         | 59998/1000000 [58:50<12:10:15, 21.45it/s]global step 60000, trans_decision ep_re 79.5445702640678

{"global_step": 60000, "eval_re": [12.549676378408325, 15.471052240680699, 
17.265765221190648, 27.84906224055625, 242.1611055279284, 31.05476811575448, 
13.945589055376782, 402.8873987936928, 10.488097106151992, 21.773187960937644], 
"eval_len": [29, 30, 38, 38, 196, 36, 23, 200, 20, 32]}

  7%|▋         | 69999/1000000 [1:09:10<12:10:10, 21.23it/s]global step 70000, trans_decision ep_re 44.03151189854791

{"global_step": 70000, "eval_re": [11.480354614038337, 16.93866160234712, 
16.748658974334532, 7.45497109834321, 11.997332856054973, 25.91751062234204, 
12.744369184461396, 309.7097989031222, 7.947192556396886, 19.376268574038424], 
"eval_len": [21, 30, 35, 29, 24, 36, 25, 209, 30, 35]}

  8%|▊         | 79998/1000000 [1:20:00<11:57:50, 21.36it/s]global step 80000, trans_decision ep_re 13.848773018838276

{"global_step": 80000, "eval_re": [14.052093431511606, 6.457025883045003, 
4.142348865667929, 18.571183357644173, 19.024254447777253, 12.30235552489135, 
12.491651129172762, 11.37055663166215, 29.976566842203002, 10.099694074807527], 
"eval_len": [34, 21, 20, 29, 37, 34, 27, 31, 38, 22]}

  9%|▉         | 89998/1000000 [1:30:40<11:48:31, 21.41it/s]global step 90000, trans_decision ep_re 15.76246213217234

{"global_step": 90000, "eval_re": [14.35281088246979, 27.192148355960654, 
12.589396972992892, 14.561353911937415, 18.164407597594455, 24.36766517351403, 
7.4894720796918355, 7.421842352336064, 12.870814421655615, 18.61470957357065], 
"eval_len": [34, 37, 21, 35, 33, 38, 30, 21, 32, 26]}

 10%|▉         | 99999/1000000 [1:41:00<11:50:57, 21.10it/s]global step 100000, trans_decision ep_re 16.087082261753395

{"global_step": 100000, "eval_re": [25.24362533335877, 21.054268534356954, 
14.4494338761244, 11.581582517761447, 14.641112904967647, 13.430596065828748, 
16.075989881749944, 12.192812956604216, 15.515334528596533, 16.68606601818526], 
"eval_len": [35, 36, 25, 34, 34, 37, 31, 29, 37, 28]}

 11%|█         | 109998/1000000 [1:51:50<11:36:00, 21.31it/s]global step 110000, trans_decision ep_re 17.641356537791303

{"global_step": 110000, "eval_re": [22.771615132684698, 30.32761702830224, 
7.834188609537916, 16.54800648817425, 7.294263346963185, 19.16564342511114, 
23.685734539320677, 9.947548312594435, 14.671856641505997, 24.167091853718485], 
"eval_len": [37, 37, 23, 29, 19, 36, 35, 23, 36, 36]}

 12%|█▏        | 119999/1000000 [2:02:11<11:28:26, 21.30it/s]global step 120000, trans_decision ep_re 75.34607931268894

{"global_step": 120000, "eval_re": [19.95804884254639, 29.269092752892206, 
16.43393667564593, 17.606834744331522, 22.241051283726122, 17.980640926974818, 
15.377240454400887, 4.195719596662283, 25.19163555528156, 585.2065922944277], 
"eval_len": [37, 38, 34, 34, 35, 35, 34, 28, 40, 346]}

 13%|█▎        | 129999/1000000 [2:12:50<11:28:23, 21.06it/s]global step 130000, trans_decision ep_re 18.1544504760189

{"global_step": 130000, "eval_re": [26.984877005189908, 10.48183724087045, 
19.641840862132383, 23.122612376043893, 13.547332074536818, 24.720339693767404, 
11.12051249937238, 16.874775645754575, 16.61959184026123, 18.430785522259953], 
"eval_len": [37, 33, 34, 37, 27, 36, 20, 37, 31, 35]}

 14%|█▍        | 139998/1000000 [2:23:40<11:11:37, 21.34it/s]global step 140000, trans_decision ep_re 15.3673447027757

{"global_step": 140000, "eval_re": [26.31598462273316, 15.55946665444307, 
14.524786201341096, 11.355801424414015, 13.43115316106873, 6.094461374800632, 
13.729620465865203, 25.07426300790132, 17.845464978746993, 9.742445136442756], 
"eval_len": [36, 36, 36, 26, 35, 28, 25, 38, 37, 21]}

 15%|█▍        | 149999/1000000 [2:34:10<11:05:41, 21.28it/s]global step 150000, trans_decision ep_re 17.814505683141757

{"global_step": 150000, "eval_re": [20.497979272645892, 14.792295233611773, 
24.565676009108902, 20.51039895816479, 10.53863070053119, 13.339192835389229, 
14.84124795167882, 17.51890626080967, 22.960579345762746, 18.58015026371458], 
"eval_len": [33, 28, 38, 31, 20, 25, 25, 29, 34, 30]}

 16%|█▌        | 159998/1000000 [2:44:50<10:53:46, 21.41it/s]global step 160000, trans_decision ep_re 20.12435838271758

{"global_step": 160000, "eval_re": [14.453856419845476, 25.54974455132108, 
6.514527058324287, 19.564287717203563, 22.646254742415728, 29.02026950664941, 
11.02882480051173, 23.984870899381807, 23.956236597794057, 24.524711533728674], 
"eval_len": [31, 38, 17, 29, 37, 38, 22, 38, 40, 37]}

 17%|█▋        | 169998/1000000 [2:55:20<10:47:58, 21.35it/s]global step 170000, trans_decision ep_re 57.285350857940514

{"global_step": 170000, "eval_re": [19.86940053426985, 17.448193939106005, 
9.44808931618915, 7.470092635530074, 13.781142337245896, 13.625005620617655, 
17.45597255016962, 427.7414943365859, 23.720030564043462, 22.29408674564757], 
"eval_len": [35, 35, 24, 19, 32, 23, 36, 241, 34, 35]}

 18%|█▊        | 179999/1000000 [3:05:50<10:41:01, 21.32it/s]global step 180000, trans_decision ep_re 16.832761840009663

{"global_step": 180000, "eval_re": [34.10822206123812, 25.395664540500366, 
13.579108164030835, 23.969074511706708, 5.900071385991918, 13.111176623520821, 
14.664900068368727, 19.584711829688242, 13.640974684772031, 4.373714530278887], 
"eval_len": [40, 38, 35, 35, 31, 34, 24, 33, 25, 16]}

 19%|█▉        | 189999/1000000 [3:16:11<10:29:41, 21.44it/s]global step 190000, trans_decision ep_re 14.007324321813542

{"global_step": 190000, "eval_re": [17.54740917605608, 10.594153524026572, 
20.037182387969185, 14.43235690483813, 10.386633889563946, 20.323736283959565, 
16.634640385867385, 8.751672779076554, 6.530068593067707, 14.835389293710296], 
"eval_len": [30, 35, 31, 24, 21, 32, 34, 26, 22, 28]}

 20%|█▉        | 199999/1000000 [3:26:42<10:22:55, 21.40it/s]global step 200000, trans_decision ep_re 15.288568613415208

{"global_step": 200000, "eval_re": [11.319873141236599, 20.90813232778777, 
7.685979973506481, 21.82366661624882, 10.080395132957447, 6.801863326839799, 
15.649697866265575, 16.646748471815858, 15.957414394741532, 26.011914882752194],
"eval_len": [24, 37, 18, 36, 21, 18, 28, 35, 27, 37]}

 21%|██        | 209999/1000000 [3:37:30<10:17:29, 21.32it/s]global step 210000, trans_decision ep_re 15.511421101574893

{"global_step": 210000, "eval_re": [11.231677548788422, 3.096437005978323, 
15.443854458795299, 8.445310380159771, 29.02601018158185, 15.856979491971293, 
11.590522516289441, 19.117364360272724, 17.644365457065387, 23.66168961484645], 
"eval_len": [20, 23, 36, 23, 39, 35, 35, 34, 36, 36]}

 22%|██▏       | 219998/1000000 [3:48:00<10:04:10, 21.52it/s]global step 220000, trans_decision ep_re 18.592110413187324

{"global_step": 220000, "eval_re": [24.0445700085953, 28.812448213968064, 
16.066535416433496, 10.633950544047247, 21.758297766766, 15.75605587382233, 
18.237139011413063, 14.534556390722786, 26.405290092982405, 9.672260813122552], 
"eval_len": [35, 39, 33, 21, 35, 27, 39, 33, 37, 22]}

 23%|██▎       | 229999/1000000 [3:58:30<10:02:33, 21.30it/s]global step 230000, trans_decision ep_re 40.58744212079272

{"global_step": 230000, "eval_re": [253.77859981609126, 21.927073826033475, 
13.15910656682484, 9.470043653183723, 28.749236644451543, 16.33140274392446, 
19.826483290848888, 5.400960562812751, 24.808493516180224, 12.423020587576067], 
"eval_len": [138, 38, 29, 23, 36, 34, 35, 21, 35, 27]}

 24%|██▍       | 239998/1000000 [4:09:00<9:44:07, 21.69it/s]global step 240000, trans_decision ep_re 14.650014519220495

{"global_step": 240000, "eval_re": [6.5533364593274435, 28.528944767379517, 
11.59122319820798, 14.261119929418784, 9.490832845570475, 7.187643046166677, 
16.38671547059623, 22.67190587451817, 14.751330831645582, 15.077092769374074], 
"eval_len": [19, 40, 37, 28, 30, 24, 35, 33, 29, 29]}

 25%|██▍       | 249999/1000000 [4:19:12<9:40:49, 21.52it/s]global step 250000, trans_decision ep_re 19.579383523455608

{"global_step": 250000, "eval_re": [25.967672124898385, 20.41356292635325, 
12.77719484643312, 31.44054280832647, 19.156430400884606, 14.646938024832199, 
19.203317326358658, 11.585840650395568, 24.950381552020662, 15.651954574053116],
"eval_len": [37, 33, 35, 39, 30, 34, 27, 27, 34, 36]}

 26%|██▌       | 259999/1000000 [4:29:50<9:34:06, 21.48it/s]global step 260000, trans_decision ep_re 18.404366346142112

{"global_step": 260000, "eval_re": [8.065081577799315, 27.967279634080995, 
19.193208554946192, 15.117238072876923, 20.450162189989943, 16.969909971145317, 
30.038608377475647, 10.510512005354197, 16.317091634130474, 19.414571443622126],
"eval_len": [26, 37, 28, 28, 37, 27, 38, 34, 26, 30]}

 27%|██▋       | 269997/1000000 [4:40:20<9:23:39, 21.58it/s]global step 270000, trans_decision ep_re 62.64602407031141

{"global_step": 270000, "eval_re": [9.24387527774098, 12.03490143588907, 
299.972423870662, 12.647885689633622, 12.455563712940533, 13.117458415913207, 
7.681775869338674, 10.82448735723814, 10.197907075656772, 238.28396199810112], 
"eval_len": [20, 33, 175, 24, 35, 27, 16, 22, 22, 152]}

 28%|██▊       | 279998/1000000 [4:50:50<9:12:54, 21.70it/s]global step 280000, trans_decision ep_re 42.97950808647542

{"global_step": 280000, "eval_re": [261.3394188336987, 24.61489906707894, 
22.973346604331283, 11.758855611505515, 12.898993608358289, 14.304078665956117, 
22.87798369514862, 22.820619964497993, 17.41727885905498, 18.789605955123726], 
"eval_len": [156, 40, 35, 30, 31, 24, 38, 35, 31, 34]}

 29%|██▉       | 289999/1000000 [5:01:01<9:13:01, 21.40it/s]global step 290000, trans_decision ep_re 27.829244995274742

{"global_step": 290000, "eval_re": [19.805345222402227, 8.05036789652227, 
20.817343908840048, 15.92536517008524, 17.182118110493505, 153.71935800741377, 
13.998962821771176, 13.72513992358374, 5.987646687554585, 9.080802204080832], 
"eval_len": [35, 22, 35, 27, 33, 86, 35, 36, 19, 30]}

 30%|██▉       | 299999/1000000 [5:11:40<9:02:50, 21.49it/s]global step 300000, trans_decision ep_re 17.762943682084988

{"global_step": 300000, "eval_re": [26.0960265661832, 10.901547211871483, 
6.571841269782133, 26.84809958425964, 20.23152338600767, 22.572874489006686, 
19.298457556064545, 16.348423138145765, 19.728374612973766, 9.03226900655499], 
"eval_len": [37, 24, 29, 41, 36, 34, 35, 30, 36, 23]}

 31%|███       | 309997/1000000 [5:22:10<8:56:57, 21.42it/s]global step 310000, trans_decision ep_re 17.419145912285522

{"global_step": 310000, "eval_re": [16.130595578840254, 10.738329896646784, 
15.1814148081452, 8.025682316606467, 19.55837704304903, 14.9569319275951, 
20.482457425006697, 26.63785302906709, 21.64475809817029, 20.83505899972833], 
"eval_len": [32, 25, 30, 22, 31, 25, 36, 36, 33, 37]}

 32%|███▏      | 319998/1000000 [5:32:40<8:45:23, 21.57it/s]global step 320000, trans_decision ep_re 17.972045555278534

{"global_step": 320000, "eval_re": [27.513847006247314, 18.49281760621119, 
25.104080256765627, 17.592332074083963, 9.757227683788914, 14.29544306016418, 
8.276786607190811, 14.895464717017136, 27.407729658209206, 16.384726883106964], 
"eval_len": [40, 32, 37, 36, 26, 23, 31, 35, 38, 30]}

 33%|███▎      | 329999/1000000 [5:42:51<8:41:10, 21.43it/s]global step 330000, trans_decision ep_re 15.712539827123909

{"global_step": 330000, "eval_re": [15.426185971675473, 10.02458685098336, 
8.836758719601773, 22.607607044572507, 11.872979871232864, 28.211775961992714, 
20.323956862393267, 14.030719214646075, 7.901960967933608, 17.888866806207453], 
"eval_len": [31, 20, 23, 37, 23, 40, 31, 25, 27, 35]}

 34%|███▍      | 339999/1000000 [5:53:30<8:32:45, 21.45it/s]global step 340000, trans_decision ep_re 13.99351628336144

{"global_step": 340000, "eval_re": [12.894122232690135, 11.09001109502992, 
6.982392601636531, 13.854078002623899, 10.100138431528377, 16.19586022122516, 
11.463030989871477, 13.75280805239119, 18.31281242301289, 25.289908783604847], 
"eval_len": [34, 35, 28, 25, 20, 29, 20, 36, 35, 39]}

 35%|███▍      | 349998/1000000 [6:04:00<8:19:06, 21.71it/s]global step 350000, trans_decision ep_re 16.98282805259641

{"global_step": 350000, "eval_re": [16.442401957554374, 11.427069124764174, 
17.55752517318252, 22.58678528463963, 20.13459913659806, 10.177609654597354, 
12.107383927186662, 26.480169367883672, 18.088697430244085, 14.826039469313537],
"eval_len": [32, 25, 26, 36, 37, 22, 34, 39, 36, 35]}

 36%|███▌      | 359999/1000000 [6:14:11<8:10:35, 21.74it/s]global step 360000, trans_decision ep_re 17.001962984204198

{"global_step": 360000, "eval_re": [9.716192684113105, 10.46817068694873, 
12.53653502616781, 21.739508983366026, 24.47007959643718, 13.216080812765545, 
19.795570166406026, 11.993538988800656, 25.56895367411373, 20.514999222923155], 
"eval_len": [21, 35, 24, 38, 36, 25, 38, 29, 39, 32]}

 37%|███▋      | 369999/1000000 [6:24:50<8:05:09, 21.64it/s]global step 370000, trans_decision ep_re 17.760082187805246

{"global_step": 370000, "eval_re": [26.427176521822396, 12.274460716738135, 
17.311018883570917, 20.025574274774915, 25.278402722189995, 14.383680023429307, 
10.71443460084413, 11.015932984699814, 16.739744126358392, 23.430397023624458], 
"eval_len": [37, 34, 31, 32, 37, 35, 23, 34, 29, 35]}

 38%|███▊      | 379998/1000000 [6:35:20<7:54:59, 21.75it/s]global step 380000, trans_decision ep_re 33.81324145501046

{"global_step": 380000, "eval_re": [18.379007644502842, 23.774702879566384, 
16.334534885440448, 21.399625913663435, 169.86249187726665, 30.25482698471975, 
20.49455862066399, 15.106514350122584, 8.456852036319077, 14.069299357839416], 
"eval_len": [28, 34, 34, 30, 94, 38, 36, 34, 22, 33]}

 39%|███▉      | 389998/1000000 [6:45:32<7:42:20, 21.99it/s]global step 390000, trans_decision ep_re 15.832559521677092

{"global_step": 390000, "eval_re": [18.489816366584126, 19.8940272114652, 
17.939992825587822, 17.725931073546132, 14.846397387253916, 10.900096903257612, 
18.49510652276028, 8.165435967534865, 16.33399818637874, 15.534792772402229], 
"eval_len": [27, 36, 37, 37, 28, 23, 34, 30, 36, 35]}

 40%|███▉      | 399999/1000000 [6:56:10<7:43:52, 21.56it/s]global step 400000, trans_decision ep_re 87.54964504260064

{"global_step": 400000, "eval_re": [15.033850812861813, 10.627360645343481, 
14.514475628433903, 12.84748828727479, 20.637734478637103, 733.4656523438218, 
15.460040074458455, 18.744529051137448, 20.68094415985809, 13.484374944179626], 
"eval_len": [28, 33, 28, 34, 36, 363, 28, 37, 40, 37]}

 41%|████      | 409997/1000000 [7:06:40<7:34:11, 21.65it/s]global step 410000, trans_decision ep_re 19.07075837291336

{"global_step": 410000, "eval_re": [24.634470461077157, 17.340115516978482, 
27.548337678288995, 27.986361764463734, 8.501964738116055, 16.531363960597112, 
17.480828338983255, 23.73156157811001, 11.68167223977089, 15.270907452747906], 
"eval_len": [36, 34, 38, 38, 21, 36, 35, 37, 35, 28]}

 42%|████▏     | 419999/1000000 [7:16:52<7:30:15, 21.47it/s]global step 420000, trans_decision ep_re 17.360305396011597

{"global_step": 420000, "eval_re": [28.653382046339615, 9.598899383585561, 
17.338611029828073, 15.384717733868495, 12.91876626505547, 19.112505022675574, 
23.373275727183483, 18.70259546986635, 12.128326853850849, 16.3919744278625], 
"eval_len": [38, 34, 30, 30, 35, 36, 36, 33, 37, 33]}

 43%|████▎     | 429999/1000000 [7:27:30<7:21:38, 21.51it/s]global step 430000, trans_decision ep_re 16.825191833682222

{"global_step": 430000, "eval_re": [28.33378337397405, 12.764665176255264, 
25.155590064834826, 31.058541583853867, 10.76969677807665, 12.505392875887917, 
14.00929289781505, 9.308869015398763, 9.273639875150085, 15.072446695575751], 
"eval_len": [41, 34, 36, 39, 30, 23, 27, 22, 23, 29]}

 44%|████▍     | 439998/1000000 [7:38:00<7:08:44, 21.77it/s]global step 440000, trans_decision ep_re 19.375387325724468

{"global_step": 440000, "eval_re": [23.87415342435536, 14.331147061582502, 
14.429408403627017, 9.692320192919135, 14.902472666049714, 27.26649752537674, 
22.429185605035798, 19.620462470731336, 22.378774794293417, 24.82945111327364], 
"eval_len": [37, 31, 27, 22, 36, 38, 38, 37, 34, 34]}

 45%|████▍     | 449999/1000000 [7:48:11<7:04:52, 21.57it/s]global step 450000, trans_decision ep_re 16.357463458940888

{"global_step": 450000, "eval_re": [9.684672028647501, 18.664849422627466, 
22.92746932349935, 6.655136828546466, 17.218428802805505, 10.449909758830131, 
11.449025199415747, 24.522056344804213, 27.10605262286291, 14.897034257369588], 
"eval_len": [21, 33, 39, 19, 29, 28, 22, 37, 39, 32]}

 46%|████▌     | 459999/1000000 [7:58:50<6:58:50, 21.49it/s]global step 460000, trans_decision ep_re 16.558056583753846

{"global_step": 460000, "eval_re": [17.917955138787857, 19.211881901235596, 
9.044396507814952, 13.105750660125674, 9.323362333767307, 17.797233683444695, 
13.140469233782607, 25.49760646735415, 16.05788954849818, 24.48402036272743], 
"eval_len": [31, 38, 24, 24, 20, 37, 23, 37, 35, 34]}

 47%|████▋     | 469998/1000000 [8:09:20<6:45:30, 21.78it/s]global step 470000, trans_decision ep_re 42.708453033394726

{"global_step": 470000, "eval_re": [284.2154848927714, 23.74078032470922, 
11.816267140386781, 18.425160782009264, 9.664647930312087, 12.247121300908391, 
10.316470321379175, 17.83142073451793, 21.34489669907727, 17.482280207875682], 
"eval_len": [161, 37, 22, 37, 19, 25, 21, 31, 36, 37]}

 48%|████▊     | 479999/1000000 [8:19:40<6:42:17, 21.54it/s]global step 480000, trans_decision ep_re 15.97275039654951

{"global_step": 480000, "eval_re": [6.730989724572698, 22.56030144668074, 
13.359167766442736, 21.44000067014903, 13.775397827133448, 22.842091673495403, 
22.445279262394397, 12.911464618723775, 7.551229795968693, 16.11158117993417], 
"eval_len": [19, 36, 25, 37, 27, 31, 35, 23, 19, 27]}

 49%|████▉     | 489997/1000000 [8:30:10<6:35:46, 21.48it/s]global step 490000, trans_decision ep_re 13.03422853294401

{"global_step": 490000, "eval_re": [9.044687498552483, 9.361831924718665, 
13.974503227426695, 9.393650060640308, 12.865109244983062, 18.346928574846967, 
17.378541677699666, 9.286768297589685, 14.68315510462847, 16.0071097183541], 
"eval_len": [22, 25, 23, 25, 34, 39, 36, 21, 25, 34]}

 50%|████▉     | 499999/1000000 [8:40:21<6:26:15, 21.57it/s]global step 500000, trans_decision ep_re 30.39295213202353

{"global_step": 500000, "eval_re": [19.135211705025633, 17.833366149968086, 
24.135854258835558, 23.93722193633603, 15.633112085879551, 17.340620656099386, 
21.411381470608713, 26.92678299375755, 118.84378584020764, 18.732184223517145], 
"eval_len": [36, 36, 35, 34, 32, 35, 38, 36, 140, 32]}

 51%|█████     | 509999/1000000 [8:51:00<6:19:44, 21.51it/s]global step 510000, trans_decision ep_re 22.184499010961837

{"global_step": 510000, "eval_re": [12.07431887770604, 27.218870565671462, 
29.230587338174136, 14.096205015124434, 30.39371856685433, 13.307869326274192, 
31.579008171426718, 12.500592972218785, 22.888186292081436, 28.55563298408684], 
"eval_len": [26, 38, 37, 25, 39, 33, 39, 35, 33, 50]}

 52%|█████▏    | 519998/1000000 [9:01:30<6:09:16, 21.66it/s]global step 520000, trans_decision ep_re 36.44265642939113

{"global_step": 520000, "eval_re": [15.492627146881853, 21.506233681838097, 
11.163579202135898, 20.94034571706758, 10.577288453698003, 22.388620996022677, 
10.014758978419026, 16.079630833567393, 8.225373963563447, 228.0381053207173], 
"eval_len": [34, 31, 24, 36, 21, 39, 28, 29, 26, 107]}

 53%|█████▎    | 529999/1000000 [9:11:50<6:03:18, 21.56it/s]global step 530000, trans_decision ep_re 17.08230952116122

{"global_step": 530000, "eval_re": [13.603960628943002, 20.955948959350025, 
17.225530732826062, 11.056920203401745, 18.518678818328528, 23.843264703026176, 
24.388236607676113, 12.29297788899868, 11.25381838928651, 17.683758279775404], 
"eval_len": [27, 34, 37, 35, 30, 39, 36, 25, 24, 35]}

 54%|█████▍    | 539998/1000000 [9:22:20<5:55:13, 21.58it/s]global step 540000, trans_decision ep_re 29.312176268376454

{"global_step": 540000, "eval_re": [10.83349018149226, 14.514943200375027, 
8.723880555784302, 26.742179271060387, 9.006976239724539, 20.826473267488357, 
148.34739618788288, 13.952480933513021, 15.993815605918572, 24.180127240525177],
"eval_len": [31, 35, 22, 37, 27, 36, 94, 34, 34, 48]}

 55%|█████▍    | 549999/1000000 [9:32:30<5:47:05, 21.61it/s]global step 550000, trans_decision ep_re 14.463949900648647

{"global_step": 550000, "eval_re": [14.129665919473451, 17.20010164680084, 
11.872358921876181, 15.714782766765824, 16.511567317445042, 23.595978777075043, 
12.579709152099989, 13.783832684762139, 13.946626772399952, 5.304875047787997], 
"eval_len": [27, 36, 30, 35, 37, 40, 24, 26, 28, 25]}

 56%|█████▌    | 559999/1000000 [9:43:10<5:43:09, 21.37it/s]global step 560000, trans_decision ep_re 22.915437594273335

{"global_step": 560000, "eval_re": [15.847339516335671, 19.231645077783178, 
6.910914026276944, 9.930599986919267, 120.87538749868882, 6.83343018065504, 
8.532592270346981, 14.383757250348516, 9.272348114005831, 17.33636202137307], 
"eval_len": [38, 37, 18, 23, 121, 19, 23, 36, 21, 29]}

 57%|█████▋    | 569997/1000000 [9:53:40<5:33:12, 21.51it/s]global step 570000, trans_decision ep_re 16.535587089707075

{"global_step": 570000, "eval_re": [17.980511410153436, 5.8554642517554365, 
10.730897687365436, 25.310296075898965, 17.80792344257393, 16.330625491773464, 
21.53078194193329, 9.81627723232076, 25.51476898744552, 14.478324375850514], 
"eval_len": [26, 21, 34, 36, 38, 28, 37, 30, 38, 26]}

 58%|█████▊    | 579999/1000000 [10:03:52<5:26:06, 21.47it/s]global step 580000, trans_decision ep_re 32.986322795517864

{"global_step": 580000, "eval_re": [20.11342034223207, 13.185022741973823, 
13.659273869878755, 22.606389547234734, 170.68366387982397, 29.03471363688836, 
11.528132787348461, 12.434495218605027, 17.349268590023698, 19.268847341169757],
"eval_len": [37, 26, 26, 35, 104, 38, 22, 31, 37, 30]}

 59%|█████▉    | 589999/1000000 [10:14:20<5:17:26, 21.53it/s]global step 590000, trans_decision ep_re 51.27635616288103

{"global_step": 590000, "eval_re": [18.431190570824928, 16.98015622750291, 
10.025824729270642, 398.3383518846567, 18.650111607570448, 12.826772242303669, 
12.908294476333822, 7.077619899816067, 9.522439963584509, 8.002800026946595], 
"eval_len": [32, 29, 21, 195, 37, 27, 29, 19, 20, 22]}

 60%|█████▉    | 599999/1000000 [10:25:00<5:11:25, 21.41it/s]global step 600000, trans_decision ep_re 39.475294545766275

{"global_step": 600000, "eval_re": [12.06708405837082, 13.274106640907972, 
19.084081826586868, 19.225962694394696, 23.210543385655264, 11.447858108022526, 
150.94316048886503, 132.95394975874896, 8.81317106804267, 3.733027428067968], 
"eval_len": [23, 27, 35, 39, 38, 23, 100, 152, 19, 17]}

 61%|██████    | 609997/1000000 [10:35:30<5:01:17, 21.57it/s]global step 610000, trans_decision ep_re 39.93552243954755

{"global_step": 610000, "eval_re": [8.133218193085217, 15.87721029138566, 
12.396554632381557, 23.028761650171916, 270.9587106195073, 13.631649170116242, 
17.806103553462723, 21.626604178505865, 6.960515730561266, 8.935896376297688], 
"eval_len": [20, 29, 24, 33, 154, 26, 38, 36, 34, 20]}

 62%|██████▏   | 619998/1000000 [10:46:00<4:52:03, 21.69it/s]global step 620000, trans_decision ep_re 44.907960464156226

{"global_step": 620000, "eval_re": [6.860919773154958, 11.750519833654181, 
14.258672589381414, 12.600222628268945, 19.959372956683183, 29.389403497941352, 
16.59364298371942, 20.56821785549652, 14.676254174745493, 302.42237834851676], 
"eval_len": [23, 25, 27, 25, 30, 38, 37, 36, 26, 160]}

 63%|██████▎   | 629999/1000000 [10:56:11<4:45:40, 21.59it/s]global step 630000, trans_decision ep_re 80.32061408235828

{"global_step": 630000, "eval_re": [9.427571303826978, 12.261109535136319, 
15.376143115585833, 12.518176142331312, 175.8155506324317, 15.825258983671228, 
18.385446102994926, 15.942604165709774, 509.33941499607874, 18.31486584581594], 
"eval_len": [19, 34, 39, 23, 118, 35, 36, 29, 209, 29]}

 64%|██████▍   | 639999/1000000 [11:06:50<4:38:15, 21.56it/s]global step 640000, trans_decision ep_re 16.490367960286694

{"global_step": 640000, "eval_re": [18.217941616312906, 20.522545768661633, 
12.009575343385709, 17.24111438084525, 9.165375531648982, 15.081830342499602, 
24.988843999039066, 20.890551653397985, 15.725501839146833, 11.060399127928964],
"eval_len": [37, 37, 24, 30, 21, 28, 37, 38, 36, 24]}

 65%|██████▍   | 649997/1000000 [11:17:20<4:30:11, 21.59it/s]global step 650000, trans_decision ep_re 40.74417430339658

{"global_step": 650000, "eval_re": [21.60832058264355, 21.230704058345793, 
12.73067445698148, 14.4628323713908, 22.300186803575404, 18.724113191099573, 
20.96877125395511, 249.0085398072817, 18.015403111458486, 8.392197397233902], 
"eval_len": [38, 34, 32, 36, 32, 33, 29, 151, 34, 18]}

 66%|██████▌   | 659999/1000000 [11:27:50<4:24:58, 21.39it/s]global step 660000, trans_decision ep_re 16.81056025153052

{"global_step": 660000, "eval_re": [25.254364746974076, 18.140681953629027, 
16.109896594778647, 9.364657897496366, 11.932164617636587, 9.795021458677693, 
18.921453945104, 12.27231104983073, 20.209039216475105, 26.106011034702956], 
"eval_len": [39, 34, 26, 20, 32, 28, 32, 29, 37, 38]}

 67%|██████▋   | 669999/1000000 [11:38:10<4:15:31, 21.52it/s]global step 670000, trans_decision ep_re 15.875320488070418

{"global_step": 670000, "eval_re": [16.574072388070206, 12.330044109257079, 
18.249800021299354, 10.898479890061271, 10.881011440009908, 9.134511720127342, 
18.206206198593037, 16.754750725091164, 16.557637320579367, 29.16669106761542], 
"eval_len": [38, 30, 37, 33, 26, 27, 35, 33, 30, 37]}

 68%|██████▊   | 679998/1000000 [11:48:40<4:06:05, 21.67it/s]global step 680000, trans_decision ep_re 34.47915637424717

{"global_step": 680000, "eval_re": [15.218798125824575, 13.67067657226693, 
10.068838005571804, 22.72329990840062, 16.875478716355378, 21.20711490238654, 
22.927637908610947, 198.0421055704843, 12.11595636171541, 11.94165767085522], 
"eval_len": [34, 30, 21, 33, 37, 33, 35, 129, 29, 35]}

 69%|██████▉   | 689999/1000000 [11:58:52<3:59:35, 21.56it/s]global step 690000, trans_decision ep_re 14.509303434777149

{"global_step": 690000, "eval_re": [14.0672096277979, 10.079918233599173, 
13.306637110685696, 11.563623590813892, 21.899303021992463, 16.81173370903885, 
19.279941049809636, 8.176026290986597, 19.799906395408012, 10.108735317639269], 
"eval_len": [38, 29, 25, 25, 35, 37, 37, 28, 33, 21]}

 70%|██████▉   | 699999/1000000 [12:09:30<3:53:50, 21.38it/s]global step 700000, trans_decision ep_re 75.1754430112485

{"global_step": 700000, "eval_re": [25.47196944317426, 28.429900861605784, 
15.67064846521293, 591.8287993317211, 19.32655094354375, 19.288225649379584, 
15.287832361452384, 12.914301487016324, 7.085696505422808, 16.45050506395616], 
"eval_len": [40, 39, 30, 228, 38, 34, 26, 24, 23, 39]}

 71%|███████   | 709997/1000000 [12:20:00<3:45:23, 21.44it/s]global step 710000, trans_decision ep_re 68.47910722549045

{"global_step": 710000, "eval_re": [22.46434537322795, 165.59508618926057, 
11.828088210571627, 19.55893548248418, 21.578776204673527, 5.069404824990416, 
20.245528678759467, 257.03822401479545, 18.90961707250369, 142.5030662036377], 
"eval_len": [37, 106, 21, 40, 39, 16, 36, 135, 34, 276]}

 72%|███████▏  | 719998/1000000 [12:30:30<3:35:50, 21.62it/s]global step 720000, trans_decision ep_re 35.48993915240148

{"global_step": 720000, "eval_re": [79.03510257316789, 11.233846699150298, 
158.37949740595167, 8.86002333305558, 25.09706681555073, 17.474568778019467, 
21.06542111136525, 18.013239169937933, 12.11275209903736, 3.627873538778588], 
"eval_len": [74, 29, 77, 18, 49, 32, 37, 29, 23, 18]}

 73%|███████▎  | 729999/1000000 [12:40:41<3:28:03, 21.63it/s]global step 730000, trans_decision ep_re 123.18221468362685

{"global_step": 730000, "eval_re": [20.043592918255204, 530.4586976859666, 
6.922297008561524, 178.67081016981902, 18.906527705072993, 169.9843612550225, 
19.7746528282584, 22.789406293837544, 256.896620501507, 7.375180469967798], 
"eval_len": [39, 210, 21, 99, 31, 96, 37, 35, 172, 26]}

 74%|███████▍  | 739999/1000000 [12:51:20<3:22:02, 21.45it/s]global step 740000, trans_decision ep_re 14.952812780658974

{"global_step": 740000, "eval_re": [22.388310301425342, 14.075026462603736, 
11.571058912981048, 14.213054968833772, 10.792303078503732, 19.02973537842534, 
8.488764619139445, 19.668054355585465, 15.241331193937732, 14.06048853515412], 
"eval_len": [34, 33, 25, 33, 22, 32, 33, 31, 37, 26]}

 75%|███████▍  | 749997/1000000 [13:01:50<3:13:44, 21.51it/s]global step 750000, trans_decision ep_re 14.26381991445264

{"global_step": 750000, "eval_re": [8.892250435071222, 16.040331930909083, 
9.56820586426722, 20.59472251515164, 11.241352633540075, 17.05056088307182, 
6.138822343609657, 12.37375777538928, 19.72613508538265, 21.012059678133742], 
"eval_len": [21, 34, 35, 30, 24, 31, 20, 34, 34, 33]}

 76%|███████▌  | 759998/1000000 [13:12:20<3:04:30, 21.68it/s]global step 760000, trans_decision ep_re 16.18914181065968

{"global_step": 760000, "eval_re": [13.126377837806155, 17.923472955612038, 
11.940524982584662, 7.798537344131149, 17.793296924933333, 15.02025484932012, 
24.678348920238058, 12.980977025588707, 24.609020031265995, 16.020607235116593],
"eval_len": [27, 35, 35, 28, 31, 33, 36, 24, 35, 35]}

 77%|███████▋  | 769998/1000000 [13:22:50<2:57:14, 21.63it/s]global step 770000, trans_decision ep_re 31.899221178657577

{"global_step": 770000, "eval_re": [156.10870537206225, 12.594722477361275, 
20.16018633681438, 19.67400207836084, 24.537874652836283, 20.718109734802486, 
17.55548243195818, 3.5839559129666143, 21.06762342629912, 22.991549363114345], 
"eval_len": [96, 31, 37, 37, 37, 38, 31, 18, 38, 35]}

 78%|███████▊  | 779999/1000000 [13:33:10<2:50:02, 21.56it/s]global step 780000, trans_decision ep_re 18.560789691473772

{"global_step": 780000, "eval_re": [30.557706430517403, 25.26430284328449, 
15.696664072896391, 18.11924180918131, 27.696306916048883, 8.201847382668834, 
11.924704665603592, 17.428581137933165, 24.90352269595337, 5.815018960650289], 
"eval_len": [38, 38, 29, 28, 37, 25, 23, 35, 35, 19]}

 79%|███████▉  | 789997/1000000 [13:43:40<2:41:51, 21.62it/s]global step 790000, trans_decision ep_re 14.307245670430566

{"global_step": 790000, "eval_re": [10.48953331835925, 8.789629913571353, 
11.374152396916918, 16.538751293278423, 5.3391475028465205, 24.11572194609638, 
16.655736925770835, 15.191024467383492, 21.190107147598457, 13.38865179248404], 
"eval_len": [21, 19, 26, 34, 30, 40, 37, 36, 30, 25]}

 80%|███████▉  | 799999/1000000 [13:53:53<2:35:43, 21.41it/s]global step 800000, trans_decision ep_re 15.39299557890887

{"global_step": 800000, "eval_re": [24.826779982437277, 14.731778259054432, 
12.756012618910672, 21.39119713974423, 9.873267899020538, 11.37813509834122, 
13.464303053879947, 13.55422864228662, 11.499469468388769, 20.454783627024995], 
"eval_len": [37, 27, 27, 33, 21, 25, 23, 35, 20, 35]}

 81%|████████  | 809999/1000000 [14:04:21<2:27:50, 21.42it/s]global step 810000, trans_decision ep_re 35.71965389003067

{"global_step": 810000, "eval_re": [25.63053428591393, 21.57074213011813, 
195.74708895864086, 17.547395352869227, 8.401893872237556, 13.934235061956283, 
21.172273844175226, 11.945061285784115, 24.42092770475039, 16.826386403860994], 
"eval_len": [39, 33, 142, 38, 19, 26, 30, 22, 34, 31]}

 82%|████████▏ | 819999/1000000 [14:15:00<2:19:12, 21.55it/s]global step 820000, trans_decision ep_re 14.623256066329692

{"global_step": 820000, "eval_re": [7.96670290714202, 13.621701841598108, 
12.783117326528009, 7.149970185865928, 15.051176391049633, 7.93519352853078, 
25.990217980901832, 23.537867207153838, 20.960887027830907, 11.235726266695846],
"eval_len": [21, 32, 25, 18, 31, 18, 39, 39, 38, 21]}

 83%|████████▎ | 829998/1000000 [14:25:30<2:10:01, 21.79it/s]global step 830000, trans_decision ep_re 35.583447664809704

{"global_step": 830000, "eval_re": [14.517017669983645, 14.184367083355397, 
21.969448678186314, 27.867333087869014, 12.299891563007133, 187.01144919631332, 
15.988734821507652, 14.444617562224902, 18.33943617758465, 29.212180808065057], 
"eval_len": [35, 37, 32, 41, 22, 124, 26, 24, 31, 40]}

 84%|████████▍ | 839999/1000000 [14:35:42<2:04:09, 21.48it/s]global step 840000, trans_decision ep_re 77.66163319382554

{"global_step": 840000, "eval_re": [162.30590354620458, 315.5416297611922, 
18.969867145522553, 202.73286588411932, 10.599612159313041, 25.129575092488807, 
18.329024792115835, 4.464862188670122, 7.25164207264555, 11.291349295983428], 
"eval_len": [85, 162, 30, 132, 32, 39, 35, 16, 20, 25]}

 85%|████████▍ | 849999/1000000 [14:46:20<1:56:17, 21.50it/s]global step 850000, trans_decision ep_re 102.60902238915078

{"global_step": 850000, "eval_re": [15.923579784201706, 6.6563229056185165, 
13.473466175293318, 9.755324693994988, 11.431316351434262, 8.53010148089492, 
8.156570578482546, 801.1508264463752, 138.4147505454543, 12.597964929758], 
"eval_len": [37, 27, 33, 23, 31, 17, 24, 362, 102, 35]}

 86%|████████▌ | 859999/1000000 [14:56:50<1:48:33, 21.49it/s]global step 860000, trans_decision ep_re 42.787694757409746

{"global_step": 860000, "eval_re": [17.5038865667781, 20.082036608849062, 
291.2240461665656, 11.619435653337836, 8.206330851278244, 22.054303447562642, 
19.333205642238, 11.3167009045554, 8.82505048396505, 17.711951248967544], 
"eval_len": [31, 34, 193, 22, 24, 33, 28, 25, 29, 36]}

 87%|████████▋ | 869997/1000000 [15:07:21<1:40:32, 21.55it/s]global step 870000, trans_decision ep_re 17.59497583969913

{"global_step": 870000, "eval_re": [22.383713137851604, 14.04778878458681, 
26.555881818318166, 14.264466490879425, 9.44153485663649, 21.778346914837698, 
26.163500815406884, 13.766149170549756, 13.771946676518139, 13.776429731406328],
"eval_len": [38, 32, 35, 37, 26, 37, 37, 30, 31, 26]}

 88%|████████▊ | 879999/1000000 [15:17:33<1:32:10, 21.70it/s]global step 880000, trans_decision ep_re 30.122455028068316

{"global_step": 880000, "eval_re": [8.623582750177624, 10.48443194536849, 
15.80700215172471, 13.865983525673235, 8.287259107009557, 16.978164572939573, 
25.097567687185144, 168.70079921241634, 17.42643656338331, 15.953322764805147], 
"eval_len": [24, 22, 25, 36, 34, 36, 36, 100, 31, 29]}

 89%|████████▉ | 889999/1000000 [15:28:11<1:25:33, 21.43it/s]global step 890000, trans_decision ep_re 46.04078432420782

{"global_step": 890000, "eval_re": [17.305573188237176, 35.22419759348934, 
22.781896232141815, 5.73578540761393, 18.490235321788674, 293.0042979335935, 
19.47078516680912, 27.408870593646526, 8.356986087434663, 12.62921571732343], 
"eval_len": [30, 54, 40, 18, 35, 154, 35, 39, 22, 25]}

 90%|████████▉ | 899997/1000000 [15:38:41<1:17:17, 21.57it/s]global step 900000, trans_decision ep_re 17.132119229208815

{"global_step": 900000, "eval_re": [11.24316325017587, 19.552708785489916, 
13.886177737008968, 19.355043635580724, 18.76892615849735, 16.292503896874422, 
22.922755551460387, 17.005948039266702, 21.786720581112537, 10.507244656621248],
"eval_len": [24, 39, 35, 34, 38, 30, 36, 29, 35, 34]}

 91%|█████████ | 909998/1000000 [15:49:11<1:09:04, 21.72it/s]global step 910000, trans_decision ep_re 15.060680575908275

{"global_step": 910000, "eval_re": [7.244540456774219, 19.21179350334374, 
8.640578565233458, 20.43808846626423, 21.19826455985817, 20.2176243821881, 
25.006195598357326, 7.324586919525643, 10.940566725681888, 10.38456658185602], 
"eval_len": [25, 36, 21, 35, 34, 36, 37, 18, 21, 35]}

 92%|█████████▏| 919998/1000000 [15:59:41<1:01:46, 21.58it/s]global step 920000, trans_decision ep_re 99.21424710990293

{"global_step": 920000, "eval_re": [19.554777242480082, 25.67196722611802, 
15.457425559482232, 26.822322548850867, 13.116087370483296, 278.63424786316875, 
556.3518438373231, 15.41561177614389, 16.908469050401564, 24.209718624577388], 
"eval_len": [31, 50, 27, 36, 35, 158, 219, 28, 35, 35]}

 93%|█████████▎| 929997/1000000 [16:10:11<54:35, 21.37it/s]global step 930000, trans_decision ep_re 10.729869466930179

{"global_step": 930000, "eval_re": [11.674978926511214, 11.42602878770411, 
14.367060282779988, 14.251298683425832, 2.3249657038454545, 7.159423131501566, 
11.686734145098784, 12.553077737764582, 10.821326468313003, 11.033800802357268],
"eval_len": [22, 26, 24, 25, 24, 17, 23, 34, 24, 31]}

 94%|█████████▍| 939999/1000000 [16:20:32<47:11, 21.19it/s]global step 940000, trans_decision ep_re 44.73692006016891

{"global_step": 940000, "eval_re": [17.615975810699176, 161.80311045818144, 
26.746543428512283, 16.87890377777704, 19.583271509394237, 22.258539341484198, 
24.611045993533445, 9.50305782849828, 25.84197305265305, 122.52677940095602], 
"eval_len": [35, 122, 35, 27, 35, 39, 36, 21, 39, 125]}

 95%|█████████▍| 949999/1000000 [16:31:21<39:08, 21.29it/s]global step 950000, trans_decision ep_re 17.68502491314272

{"global_step": 950000, "eval_re": [13.478582377341045, 10.806130494329743, 
15.752878656823702, 25.043369946962823, 18.75357017514524, 17.536280729129565, 
18.79709100687976, 19.13071268114286, 9.546333724266184, 28.005299339406275], 
"eval_len": [36, 23, 31, 36, 35, 31, 36, 34, 26, 36]}

 96%|█████████▌| 959999/1000000 [16:42:01<31:22, 21.25it/s]global step 960000, trans_decision ep_re 13.851171913397668

{"global_step": 960000, "eval_re": [12.731630254268772, 21.182225918203255, 
11.772515614071217, 10.895481901315307, 10.129432888156357, 12.129437713606464, 
12.593844575822233, 19.592631267898376, 19.063371741692364, 8.421147258942318], 
"eval_len": [29, 36, 34, 25, 35, 26, 29, 36, 34, 32]}

 97%|█████████▋| 969998/1000000 [16:52:31<23:19, 21.43it/s]global step 970000, trans_decision ep_re 41.93827345776481

{"global_step": 970000, "eval_re": [19.984486113330455, 18.071941336015733, 
19.10895289602304, 7.167530327067572, 19.572887317985742, 20.431533852932528, 
8.72517771274915, 259.62221292971697, 22.448478978025474, 24.249533113801423], 
"eval_len": [36, 28, 37, 21, 35, 37, 31, 149, 34, 38]}

 98%|█████████▊| 979999/1000000 [17:03:11<15:48, 21.09it/s]global step 980000, trans_decision ep_re 13.327202018385936

{"global_step": 980000, "eval_re": [14.840618779489043, 7.687896494585161, 
16.978766062420952, 11.874364497996853, 6.865029140810629, 17.752611105871818, 
21.169820935055416, 13.910067814009142, 10.634113632473992, 11.558731721146334],
"eval_len": [25, 22, 34, 27, 25, 37, 38, 26, 22, 26]}

 99%|█████████▉| 989999/1000000 [17:13:21<07:39, 21.78it/s]global step 990000, trans_decision ep_re 13.940220017854779

{"global_step": 990000, "eval_re": [21.883237313732312, 15.491707301233085, 
8.291075746845072, 11.789031824234048, 20.12381510754004, 16.50583046892042, 
9.31413299152301, 8.966808844259102, 11.813642802339176, 15.222917777921511], 
"eval_len": [33, 37, 18, 23, 33, 31, 29, 34, 36, 31]}

100%|█████████▉| 999999/1000000 [17:23:51<00:00, 21.86it/s]global step 1000000, trans_decision ep_re 19.617204430161795

{"global_step": 1000000, "eval_re": [17.86554516553322, 28.323282387164536, 
22.104023376172982, 8.542207543848766, 27.409069969186294, 20.250314069419044, 
20.307049545240456, 22.60723125657233, 16.956856130545344, 11.806464857934959], 
"eval_len": [34, 38, 35, 28, 37, 37, 31, 40, 37, 33]}

100%|██████████| 1000000/1000000 [17:23:57<00:00, 15.96it/s]
