
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9997/1000000 [05:30<12:50:46, 21.41it/s]global step 10000, trans_decision ep_re 25.148828473921597

{"global_step": 10000, "eval_re": [15.624390731785159, 32.81839256338542, 
17.049598424665604, 35.94944567414154, 14.748495471544574, 10.651317822709055, 
12.625707062474039, 71.94858975540045, 16.897879670272868, 23.174467562837265], 
"eval_len": [20, 26, 23, 29, 15, 14, 17, 67, 20, 28]}

  2%|▏         | 19999/1000000 [16:00<12:51:37, 21.17it/s]global step 20000, trans_decision ep_re 30.276633750643676

{"global_step": 20000, "eval_re": [8.161909714918146, 13.365091515716788, 
87.20231778269681, 56.77941025660605, 13.795854434910144, 24.28895301405534, 
25.126291885798267, 45.033525215386135, 14.95496821252531, 14.058015473823739], 
"eval_len": [19, 16, 57, 68, 31, 23, 32, 64, 34, 25]}

  3%|▎         | 29999/1000000 [26:40<12:45:31, 21.12it/s]global step 30000, trans_decision ep_re 22.880505927596158

{"global_step": 30000, "eval_re": [27.031157385598362, 17.423537929962247, 
13.358929474445526, 30.078527548139355, 13.797723770224934, 8.755662644308831, 
84.80135390102573, 13.127063773535516, 9.460506751104699, 10.970596097616394], 
"eval_len": [24, 17, 27, 28, 16, 38, 60, 22, 11, 15]}

  4%|▍         | 39999/1000000 [37:10<12:38:27, 21.10it/s]global step 40000, trans_decision ep_re 28.779575396042805

{"global_step": 40000, "eval_re": [12.896561840517942, 22.245969265718756, 
23.239330965728133, 26.327921584956023, 16.96322844122911, 15.804918157148789, 
30.030977748936667, 30.030579662678246, 96.17179298879347, 14.084473304720904], 
"eval_len": [16, 23, 34, 31, 19, 18, 47, 29, 125, 24]}

  5%|▍         | 49998/1000000 [47:50<12:20:11, 21.39it/s]global step 50000, trans_decision ep_re 65.15964702288444

{"global_step": 50000, "eval_re": [33.25914320483525, 92.28123343577124, 
62.03834730800579, 34.508139325987294, 9.284742519136914, 51.890556381787846, 
148.8673763272978, 118.58818616359183, 91.77327505438086, 9.105470508049578], 
"eval_len": [34, 59, 71, 28, 11, 76, 85, 77, 68, 13]}

  6%|▌         | 59999/1000000 [58:11<12:17:13, 21.25it/s]global step 60000, trans_decision ep_re 21.13106132992761

{"global_step": 60000, "eval_re": [13.707943427309097, 17.35618761156188, 
26.419784979926877, 14.035229970885805, 47.281942708664815, 30.777796637915994, 
12.155821593672687, 9.989808499675984, 20.842944613583256, 18.74315325607973], 
"eval_len": [24, 23, 28, 19, 56, 27, 16, 16, 28, 23]}

  7%|▋         | 69999/1000000 [1:08:41<12:03:02, 21.44it/s]global step 70000, trans_decision ep_re 30.82679619739203

{"global_step": 70000, "eval_re": [78.21558253857654, 12.925015989676037, 
21.282796928294413, 17.3464675810075, 28.528866254234874, 21.372264814976937, 
81.18596614251305, 14.397335586442072, 8.31127623040858, 24.702389907790316], 
"eval_len": [54, 17, 22, 25, 26, 25, 102, 16, 12, 31]}

  8%|▊         | 79999/1000000 [1:19:20<12:00:17, 21.29it/s]global step 80000, trans_decision ep_re 24.384636655639845

{"global_step": 80000, "eval_re": [20.739298662898914, 15.27650141640604, 
30.160697370092727, 27.024804739581437, 18.03590321478643, 18.04662934420904, 
55.0070887933211, 23.26828243199143, 20.00852455063385, 16.27863603247744], 
"eval_len": [24, 19, 27, 28, 19, 23, 60, 29, 23, 20]}

  9%|▉         | 89999/1000000 [1:29:40<11:48:47, 21.40it/s]global step 90000, trans_decision ep_re 25.40221943878578

{"global_step": 90000, "eval_re": [10.603792989772419, 12.975511365820582, 
11.583010810687332, 16.672892443683924, 139.66927312891855, 14.715771383175724, 
10.45788002725278, 14.981288888224784, 13.025040164450656, 9.337733185871054], 
"eval_len": [21, 16, 13, 20, 97, 15, 15, 20, 18, 13]}

 10%|▉         | 99998/1000000 [1:40:30<11:30:48, 21.71it/s]global step 100000, trans_decision ep_re 16.251790209712407

{"global_step": 100000, "eval_re": [16.496515885013306, 32.559382050467626, 
15.379526434868737, 9.18607862869753, 12.968756683105727, 15.978193522050631, 
17.565557490083712, 14.841818140052995, 10.63207116750762, 16.910002095276194], 
"eval_len": [20, 29, 26, 15, 26, 25, 26, 21, 22, 23]}

 11%|█         | 109999/1000000 [1:50:50<11:25:32, 21.64it/s]global step 110000, trans_decision ep_re 25.68010889570943

{"global_step": 110000, "eval_re": [9.682065330056206, 82.4810820389932, 
30.143318852825626, 16.03629099641832, 21.354785917159745, 28.126016529605657, 
18.4209233684697, 22.09939579111445, 7.770145065833118, 20.687065066618306], 
"eval_len": [14, 71, 35, 18, 30, 37, 20, 26, 14, 29]}

 12%|█▏        | 119999/1000000 [2:01:20<11:22:42, 21.48it/s]global step 120000, trans_decision ep_re 33.564317232034306

{"global_step": 120000, "eval_re": [35.70869748092117, 10.370336086019652, 
12.700908389742176, 56.90233773989742, 20.62380407240461, 18.48933572983904, 
8.026597258524573, 145.07653901844526, 7.831189334399865, 19.91342721014935], 
"eval_len": [42, 16, 20, 47, 35, 33, 13, 73, 11, 30]}

 13%|█▎        | 129999/1000000 [2:11:50<11:20:30, 21.31it/s]global step 130000, trans_decision ep_re 34.388010181271284

{"global_step": 130000, "eval_re": [86.18314281963569, 60.61692118716924, 
69.9506218221753, 23.239323091774022, 22.319781746667328, 19.01219421014129, 
17.419502614724287, 23.251669874230586, 13.10963510096279, 8.777309345232327], 
"eval_len": [65, 55, 43, 29, 25, 28, 19, 30, 26, 14]}

 14%|█▍        | 139999/1000000 [2:22:20<11:11:08, 21.36it/s]global step 140000, trans_decision ep_re 44.52503041157242

{"global_step": 140000, "eval_re": [25.25615036936865, 14.015278586881738, 
19.735929553213996, 155.0594462729407, 12.436108838206179, 22.38224060316415, 
19.849964450784714, 118.22771850993374, 12.54772082700195, 45.73974610422838], 
"eval_len": [27, 22, 22, 95, 17, 27, 27, 79, 22, 50]}

 15%|█▍        | 149999/1000000 [2:32:50<11:06:41, 21.25it/s]global step 150000, trans_decision ep_re 24.888005088939167

{"global_step": 150000, "eval_re": [11.40747569046185, 57.83048173912421, 
16.802758467332016, 27.20377944485144, 30.51311345680911, 22.658619883623327, 
39.5803294043035, 11.765112147139508, 15.498757668503632, 15.619622987243062], 
"eval_len": [16, 77, 20, 25, 33, 27, 38, 16, 21, 21]}

 16%|█▌        | 159999/1000000 [2:43:20<10:57:29, 21.29it/s]global step 160000, trans_decision ep_re 69.17089056189981

{"global_step": 160000, "eval_re": [266.5383131402028, 24.253499155694712, 
16.44422525798973, 17.65980887814636, 17.281652772056784, 16.34249668096422, 
20.70673602212242, 275.3301511557377, 18.252687049533318, 18.899335506550102], 
"eval_len": [113, 25, 19, 26, 19, 18, 21, 108, 20, 20]}

 17%|█▋        | 169997/1000000 [2:53:50<10:46:09, 21.41it/s]global step 170000, trans_decision ep_re 19.50317251329394

{"global_step": 170000, "eval_re": [8.87852901276385, 14.230557424576014, 
38.828272402700485, 10.839619515777533, 18.973930346785206, 12.656074672283793, 
15.592579846897495, 15.7149236822088, 24.486623027148134, 34.830615201798096], 
"eval_len": [12, 22, 37, 24, 24, 14, 31, 21, 30, 29]}

 18%|█▊        | 179998/1000000 [3:04:20<10:24:46, 21.87it/s]global step 180000, trans_decision ep_re 18.022174583914012

{"global_step": 180000, "eval_re": [14.526287914094056, 26.57080373589343, 
24.92953854281496, 11.763672745522223, 10.702124119927463, 33.241900117897046, 
21.604412199084816, 16.81825906860983, 10.632020093836235, 9.432727301460071], 
"eval_len": [17, 32, 23, 17, 14, 32, 29, 19, 13, 13]}

 19%|█▉        | 189999/1000000 [3:14:31<10:32:26, 21.35it/s]global step 190000, trans_decision ep_re 22.39689263910265

{"global_step": 190000, "eval_re": [18.027637422348008, 10.210944320091485, 
26.461845505132292, 10.110108295671985, 29.14305471971153, 18.765446315399572, 
19.065417725486288, 22.46538575351665, 30.95040492201025, 38.768681411658434], 
"eval_len": [28, 14, 36, 25, 24, 28, 20, 35, 30, 43]}

 20%|█▉        | 199999/1000000 [3:25:01<10:24:59, 21.33it/s]global step 200000, trans_decision ep_re 24.621020460566747

{"global_step": 200000, "eval_re": [19.480653879937194, 27.786496711780778, 
15.213258051065688, 14.437996072017857, 16.25599140236915, 10.147677427218479, 
7.2368421853293645, 17.090820150708335, 19.353248858277023, 99.2072198669636], 
"eval_len": [18, 27, 19, 26, 21, 14, 35, 20, 27, 68]}

 21%|██        | 209998/1000000 [3:35:50<10:17:02, 21.34it/s]global step 210000, trans_decision ep_re 16.524275041938257

{"global_step": 210000, "eval_re": [26.475739604034757, 16.20447748387398, 
15.978009783798168, 12.719181687151465, 14.718852535458105, 14.356245110348704, 
10.872627022525212, 14.65333143576034, 24.23677025984503, 15.027515496586796], 
"eval_len": [41, 21, 18, 17, 27, 19, 16, 16, 33, 20]}

 22%|██▏       | 219997/1000000 [3:46:20<10:08:09, 21.38it/s]global step 220000, trans_decision ep_re 31.151791654220393

{"global_step": 220000, "eval_re": [124.35351631451172, 16.969935941940975, 
16.687434413011193, 24.428921522169436, 20.576322409441044, 17.394861956157037, 
28.136793333218048, 13.972824676917234, 26.156344078461565, 22.840961896375603],
"eval_len": [84, 20, 24, 37, 37, 21, 25, 25, 23, 21]}

 23%|██▎       | 229999/1000000 [3:56:50<10:00:34, 21.37it/s]global step 230000, trans_decision ep_re 25.75873718184681

{"global_step": 230000, "eval_re": [36.17489678423912, 12.238209418251532, 
30.542664089952346, 25.47798518377929, 10.019325643138147, 17.217139141242694, 
24.77105079862976, 23.953987817529104, 43.91786650227117, 33.27424643943493], 
"eval_len": [38, 21, 29, 31, 12, 24, 25, 24, 35, 30]}

 24%|██▍       | 239999/1000000 [4:07:20<9:51:43, 21.41it/s]global step 240000, trans_decision ep_re 35.850762094807585

{"global_step": 240000, "eval_re": [14.858242461889226, 19.188352890911926, 
12.59003787440648, 115.8181128856402, 14.62428999501224, 18.431617253521868, 
9.991671485579174, 23.151119232824804, 28.234765533132983, 101.61941133515693], 
"eval_len": [18, 18, 20, 86, 16, 21, 14, 24, 33, 85]}

 25%|██▍       | 249999/1000000 [4:17:42<9:48:20, 21.25it/s]global step 250000, trans_decision ep_re 31.459941310208883

{"global_step": 250000, "eval_re": [63.13870666197515, 11.125842679127777, 
78.40405710085967, 24.311426494826947, 12.836988498076071, 14.492008556100036, 
17.668295024521694, 65.83031007522696, 14.36960690484564, 12.42217110652887], 
"eval_len": [71, 13, 49, 29, 16, 26, 20, 64, 32, 14]}

 26%|██▌       | 259998/1000000 [4:28:30<9:32:17, 21.55it/s]global step 260000, trans_decision ep_re 32.11762178388077

{"global_step": 260000, "eval_re": [157.69186326278668, 11.449655419255217, 
12.330399538308313, 21.99519317928277, 10.841582348759447, 26.610844932536573, 
9.907475885630861, 35.3753288059281, 11.49194207519333, 23.481932391126385], 
"eval_len": [92, 18, 18, 28, 40, 26, 16, 34, 17, 27]}

 27%|██▋       | 269999/1000000 [4:39:00<9:30:18, 21.33it/s]global step 270000, trans_decision ep_re 16.852136599249842

{"global_step": 270000, "eval_re": [9.956191635815753, 24.21934104506861, 
17.704942448905257, 16.98573370122362, 11.640600628858595, 12.963821301535853, 
9.89068951622399, 17.865296761342268, 24.67490222838485, 22.619846725139624], 
"eval_len": [14, 32, 21, 17, 18, 21, 29, 28, 35, 23]}

 28%|██▊       | 279999/1000000 [4:49:30<9:27:02, 21.16it/s]global step 280000, trans_decision ep_re 34.131122690592626

{"global_step": 280000, "eval_re": [19.956747840172717, 10.83275366088422, 
20.151614309621355, 86.56071164571574, 10.593354425689489, 20.917673116870297, 
129.65505130671315, 15.857783327224071, 14.317555810574165, 12.46798146246105], 
"eval_len": [21, 14, 27, 69, 20, 22, 80, 21, 19, 31]}

 29%|██▉       | 289999/1000000 [5:00:00<9:10:27, 21.50it/s]global step 290000, trans_decision ep_re 35.4338502866171

{"global_step": 290000, "eval_re": [7.51038751969933, 16.16842219225042, 
45.86241894773361, 35.88157931732764, 12.525804470736704, 83.06166064697166, 
85.54580773307727, 29.724205368242135, 11.573806257907554, 26.484410412224708], 
"eval_len": [10, 22, 72, 31, 18, 78, 59, 32, 16, 29]}

 30%|██▉       | 299999/1000000 [5:10:20<9:08:29, 21.27it/s]global step 300000, trans_decision ep_re 31.004587906537193

{"global_step": 300000, "eval_re": [69.98944473075608, 15.931106517969885, 
17.08831112435196, 43.02077756923218, 20.730434402259885, 24.448333906950758, 
21.28880330945287, 61.406929769531935, 16.450859680731444, 19.690878054134963], 
"eval_len": [79, 27, 22, 39, 33, 36, 23, 47, 23, 27]}

 31%|███       | 309999/1000000 [5:21:00<8:56:13, 21.45it/s]global step 310000, trans_decision ep_re 21.12021768735591

{"global_step": 310000, "eval_re": [24.04113992574162, 17.859557785327503, 
22.50338555661812, 9.016789621577615, 30.64421055190761, 20.71924834213243, 
27.83843682854109, 30.189425418789142, 12.363068500217619, 16.026914342706345], 
"eval_len": [26, 27, 25, 11, 41, 23, 32, 40, 18, 16]}

 32%|███▏      | 319998/1000000 [5:31:30<8:45:39, 21.56it/s]global step 320000, trans_decision ep_re 23.576416428699208

{"global_step": 320000, "eval_re": [16.58939161755303, 75.05558401572108, 
11.746793386394552, 16.769458745975207, 14.256798615307623, 18.833769219279667, 
22.535927312701606, 17.660871622291392, 16.193556792847275, 26.1220129589206], 
"eval_len": [19, 47, 17, 20, 19, 23, 29, 29, 21, 27]}

 33%|███▎      | 329999/1000000 [5:41:41<8:37:10, 21.59it/s]global step 330000, trans_decision ep_re 42.50860532577012

{"global_step": 330000, "eval_re": [170.70813449209473, 19.950397220093716, 
125.20738458677262, 12.049803536605836, 18.940861608126525, 15.134878694894633, 
12.374522603272322, 24.27979392224333, 13.486963933513469, 12.95331266008396], 
"eval_len": [99, 32, 89, 16, 27, 17, 16, 23, 14, 17]}

 34%|███▍      | 339999/1000000 [5:52:20<8:35:05, 21.36it/s]global step 340000, trans_decision ep_re 31.22597153393185

{"global_step": 340000, "eval_re": [142.45205754123157, 19.039697067762187, 
15.65368900111271, 12.840581950134846, 11.239891262545752, 17.013853346931285, 
11.169077494962167, 40.80275757494133, 28.188654822542162, 13.859455277154552], 
"eval_len": [96, 19, 19, 22, 23, 22, 13, 35, 28, 16]}

 35%|███▍      | 349998/1000000 [6:02:50<8:17:58, 21.76it/s]global step 350000, trans_decision ep_re 46.687330328957486

{"global_step": 350000, "eval_re": [21.85578244299046, 97.61771491328177, 
21.366020566365197, 160.4854941284588, 14.122137605154013, 56.08409360557417, 
14.82901220024005, 55.7539732216181, 14.74551973832361, 10.013554867568752], 
"eval_len": [29, 84, 27, 87, 18, 60, 16, 52, 25, 20]}

 36%|███▌      | 359999/1000000 [6:13:01<8:17:21, 21.45it/s]global step 360000, trans_decision ep_re 36.97484877973614

{"global_step": 360000, "eval_re": [16.27869780847747, 13.82581692594269, 
19.490641107776423, 30.251188078801672, 228.5719409818367, 10.375562931734521, 
11.858474370098778, 14.827511897480807, 14.648793741736993, 9.619859953475354], 
"eval_len": [22, 18, 23, 26, 119, 14, 19, 19, 21, 36]}

 37%|███▋      | 369999/1000000 [6:23:40<8:05:11, 21.64it/s]global step 370000, trans_decision ep_re 28.833118851057215

{"global_step": 370000, "eval_re": [11.58295056205152, 10.233434100900848, 
16.169333221100658, 9.496014236920256, 22.45953204362256, 162.83467039487292, 
12.970300908363253, 12.55797380331051, 12.080709714598767, 17.946269524830875], 
"eval_len": [18, 16, 17, 13, 35, 96, 15, 15, 21, 19]}

 38%|███▊      | 379998/1000000 [6:34:10<7:55:58, 21.71it/s]global step 380000, trans_decision ep_re 28.382272934995854

{"global_step": 380000, "eval_re": [14.550737913594707, 19.124187217307767, 
40.91091170908307, 28.078759884826763, 39.03347043662719, 35.65785726374028, 
22.305820738112626, 21.521035321066098, 19.12470916722141, 43.51523969837868], 
"eval_len": [17, 22, 34, 32, 37, 45, 29, 22, 21, 36]}

 39%|███▉      | 389999/1000000 [6:44:20<7:51:45, 21.55it/s]global step 390000, trans_decision ep_re 36.94349933675976

{"global_step": 390000, "eval_re": [20.160985968687346, 26.963040453210454, 
16.4769172542911, 11.156567954261606, 15.142422216647534, 90.79675839555655, 
29.273212871606486, 28.29483757412871, 36.38585305523899, 94.78439762396873], 
"eval_len": [28, 24, 18, 16, 22, 52, 36, 43, 48, 67]}

 40%|███▉      | 399999/1000000 [6:55:00<7:48:33, 21.34it/s]global step 400000, trans_decision ep_re 17.156584726408038

{"global_step": 400000, "eval_re": [14.284078603343351, 15.307420215123347, 
9.390227362603872, 23.361721172599268, 12.075678566703065, 16.322403711692925, 
19.12697828069213, 17.258835079630718, 10.778151703094625, 33.66035256859709], 
"eval_len": [15, 20, 13, 38, 16, 22, 20, 21, 15, 33]}

 41%|████      | 409999/1000000 [7:05:30<7:40:23, 21.36it/s]global step 410000, trans_decision ep_re 38.29691880883145

{"global_step": 410000, "eval_re": [16.299783264946196, 12.723886845724467, 
35.71994264780499, 91.18370992110898, 14.61540563270339, 24.43059483331693, 
33.44544152246526, 13.099300751141593, 107.61878761979676, 33.83233504930596], 
"eval_len": [26, 20, 37, 67, 16, 33, 36, 18, 70, 42]}

 42%|████▏     | 419999/1000000 [7:16:00<7:31:14, 21.42it/s]global step 420000, trans_decision ep_re 36.69494191207164

{"global_step": 420000, "eval_re": [23.56121106883447, 22.96768554315174, 
9.864434626563638, 21.545655347663633, 12.13586493632702, 74.44357126130839, 
11.627312867576766, 26.688882185692588, 137.24867728066522, 26.86612400293296], 
"eval_len": [30, 27, 15, 27, 19, 83, 14, 34, 92, 29]}

 43%|████▎     | 429999/1000000 [7:26:30<7:20:23, 21.57it/s]global step 430000, trans_decision ep_re 30.10816190077926

{"global_step": 430000, "eval_re": [12.821240059545062, 20.4148854243112, 
18.077973414710666, 30.668589946440722, 18.749580688528198, 18.213475192586195, 
10.72458604088895, 114.02854885549223, 46.6610466159757, 10.7216927693137], 
"eval_len": [17, 25, 21, 27, 18, 25, 18, 92, 42, 21]}

 44%|████▍     | 439998/1000000 [7:37:00<7:06:59, 21.86it/s]global step 440000, trans_decision ep_re 52.43595863475737

{"global_step": 440000, "eval_re": [14.919096976359691, 12.530844044372328, 
237.03468385405714, 11.756180209967626, 14.019920260329188, 8.050616879147315, 
8.532629003964212, 22.27515011067778, 164.6257502540868, 30.61471475461158], 
"eval_len": [24, 19, 109, 18, 14, 16, 11, 27, 107, 32]}

 45%|████▍     | 449998/1000000 [7:47:30<7:04:40, 21.58it/s]global step 450000, trans_decision ep_re 41.585159718143686

{"global_step": 450000, "eval_re": [19.483890450430337, 11.428122934179255, 
16.926082667818676, 23.266542237005854, 16.255761380358063, 104.99994037296412, 
32.861146499936275, 161.1159819728365, 13.05158235337914, 16.46254631252866], 
"eval_len": [23, 18, 27, 37, 19, 81, 38, 96, 15, 37]}

 46%|████▌     | 459998/1000000 [7:58:00<6:52:25, 21.82it/s]global step 460000, trans_decision ep_re 15.727559231127582

{"global_step": 460000, "eval_re": [15.353759323887404, 11.711611710117968, 
13.098172609769602, 26.53859025041223, 20.425163698643637, 24.083390283025178, 
9.13797250171082, 13.040404386013503, 12.407947664068589, 11.478579883626905], 
"eval_len": [25, 22, 17, 30, 31, 28, 20, 17, 18, 14]}

 47%|████▋     | 469999/1000000 [8:08:20<6:49:40, 21.56it/s]global step 470000, trans_decision ep_re 31.86177574717812

{"global_step": 470000, "eval_re": [33.386306529453236, 11.535442348496115, 
31.539748310270326, 17.442496668979857, 21.924853715790398, 115.70394673305209, 
14.834726200544107, 15.92228205976831, 28.35757008355756, 27.97038482186921], 
"eval_len": [47, 13, 35, 25, 34, 90, 22, 18, 30, 28]}

 48%|████▊     | 479999/1000000 [8:18:50<6:44:20, 21.43it/s]global step 480000, trans_decision ep_re 28.21081220081128

{"global_step": 480000, "eval_re": [8.573285765575992, 15.722348491379625, 
16.135709157565003, 101.99014363344284, 15.207051888423791, 11.171137428384194, 
10.926629450587981, 24.416808859228684, 16.07070235801165, 61.89430497551305], 
"eval_len": [11, 18, 17, 64, 26, 22, 15, 28, 23, 60]}

 49%|████▉     | 489998/1000000 [8:29:20<6:35:58, 21.47it/s]global step 490000, trans_decision ep_re 35.83095974453383

{"global_step": 490000, "eval_re": [27.06692695867912, 11.752656364247496, 
111.18473699026107, 11.899492821910224, 18.88519480627464, 27.440742302808218, 
22.988637950173484, 92.21270073629003, 24.578545012373333, 10.299963502320656], 
"eval_len": [25, 16, 76, 18, 19, 33, 39, 67, 46, 15]}

 50%|████▉     | 499999/1000000 [8:39:40<6:25:39, 21.61it/s]global step 500000, trans_decision ep_re 29.71743210058517

{"global_step": 500000, "eval_re": [11.206262282203948, 10.96848989566902, 
98.71024794021838, 94.02676972668348, 12.401054020626194, 14.473625819472186, 
13.652610777557221, 17.934286226367494, 10.575112164978533, 13.225862152075255],
"eval_len": [16, 17, 74, 79, 18, 32, 22, 18, 14, 23]}

 51%|█████     | 509999/1000000 [8:50:10<6:22:43, 21.34it/s]global step 510000, trans_decision ep_re 35.98569397448173

{"global_step": 510000, "eval_re": [118.37764955106202, 13.492262710441281, 
13.522904661182773, 16.984424047092848, 100.00176761627652, 19.240085845992848, 
29.998481632337764, 28.067759111585268, 11.948099117170722, 8.223505451675221], 
"eval_len": [80, 24, 16, 21, 55, 36, 32, 29, 14, 12]}

 52%|█████▏    | 519998/1000000 [9:00:40<6:07:41, 21.76it/s]global step 520000, trans_decision ep_re 41.3233835371709

{"global_step": 520000, "eval_re": [29.098012877991934, 20.64932339920175, 
19.164065905829265, 26.997174268035785, 154.31895474804045, 14.21129432604076, 
66.3874982078914, 12.925571266370678, 51.9663258730719, 17.5156144992351], 
"eval_len": [35, 20, 26, 23, 98, 20, 89, 21, 44, 18]}

 53%|█████▎    | 529999/1000000 [9:10:52<6:06:33, 21.37it/s]global step 530000, trans_decision ep_re 26.558612932946385

{"global_step": 530000, "eval_re": [17.696122041990684, 10.88460286281422, 
55.94860651880582, 13.891230148843489, 31.96849083196936, 20.518704717790936, 
10.245698680842166, 11.417731737133176, 84.02303635042567, 8.991905438848352], 
"eval_len": [21, 13, 77, 15, 33, 19, 13, 13, 82, 11]}

 54%|█████▍    | 539999/1000000 [9:21:30<5:57:58, 21.42it/s]global step 540000, trans_decision ep_re 36.926732322761275

{"global_step": 540000, "eval_re": [179.49999744862296, 24.649949909925642, 
21.469683519358206, 19.778903450991912, 20.40532796021014, 10.456914330343716, 
41.29486868432862, 12.843611756600584, 9.07663093867815, 29.79143522855279], 
"eval_len": [105, 36, 22, 21, 23, 13, 35, 18, 12, 31]}

 55%|█████▍    | 549997/1000000 [9:32:00<5:51:13, 21.35it/s]global step 550000, trans_decision ep_re 33.96549081190509

{"global_step": 550000, "eval_re": [17.521270472289785, 39.14127492498761, 
25.84932604716904, 12.021590063772019, 31.433424412287422, 14.956345901760791, 
141.9578891454281, 12.104227859555042, 19.288533370655546, 25.381025921145547], 
"eval_len": [21, 41, 33, 14, 31, 19, 99, 17, 19, 34]}

 56%|█████▌    | 559998/1000000 [9:42:30<5:38:11, 21.68it/s]global step 560000, trans_decision ep_re 35.23410500019701

{"global_step": 560000, "eval_re": [14.301523484688515, 17.01605473925744, 
117.86376263251445, 17.22996953158506, 13.407797731349199, 26.37284919612571, 
18.441413359562045, 92.81904416097326, 22.080941445412936, 12.807693720501515], 
"eval_len": [15, 29, 74, 33, 30, 24, 21, 59, 21, 16]}

 57%|█████▋    | 569999/1000000 [9:52:42<5:35:44, 21.35it/s]global step 570000, trans_decision ep_re 44.94220147420331

{"global_step": 570000, "eval_re": [15.229572062414764, 83.68808928758494, 
15.578859924680986, 20.091655132921716, 8.985857592738228, 19.5753344716136, 
19.699069280343057, 15.209809070499583, 30.096725047820616, 221.26704287141553],
"eval_len": [23, 48, 22, 22, 11, 21, 19, 21, 27, 145]}

 58%|█████▊    | 579999/1000000 [10:03:20<5:26:18, 21.45it/s]global step 580000, trans_decision ep_re 42.2004314038353

{"global_step": 580000, "eval_re": [10.60552873171574, 88.55648439868267, 
113.63155442558121, 14.372130892994372, 15.954658247714589, 36.07968828224497, 
100.52095639564564, 19.566172427004638, 11.268562681427781, 11.448577555341444],
"eval_len": [15, 63, 129, 16, 20, 31, 87, 19, 22, 18]}

 59%|█████▉    | 589999/1000000 [10:13:50<5:21:43, 21.24it/s]global step 590000, trans_decision ep_re 31.474642564874593

{"global_step": 590000, "eval_re": [49.35408643150732, 10.806333749282448, 
24.938990301036988, 11.984459190181576, 118.10497526060308, 28.557361434037233, 
24.377736661790223, 12.759184450957791, 11.103744899550673, 22.759553269798587],
"eval_len": [44, 16, 24, 22, 62, 31, 23, 20, 12, 20]}

 60%|█████▉    | 599999/1000000 [10:24:20<5:11:15, 21.42it/s]global step 600000, trans_decision ep_re 59.48076623496181

{"global_step": 600000, "eval_re": [11.558397401358569, 11.203872097051939, 
18.328869714015635, 24.1420835984823, 11.933764478688166, 14.666950739307465, 
23.732173898165364, 319.14965547611365, 152.6130834185385, 7.478811527896546], 
"eval_len": [16, 13, 19, 27, 16, 30, 27, 230, 148, 13]}

 61%|██████    | 609999/1000000 [10:34:50<5:02:21, 21.50it/s]global step 610000, trans_decision ep_re 28.75284992298548

{"global_step": 610000, "eval_re": [11.685655888426203, 12.943213553383943, 
10.243569117722926, 19.655755442348354, 8.186882427046369, 159.97368005732466, 
31.01562157652265, 10.834921960623562, 8.082679191345445, 14.906520015110699], 
"eval_len": [14, 17, 14, 28, 11, 88, 39, 27, 13, 19]}

 62%|██████▏   | 619997/1000000 [10:45:20<4:55:23, 21.44it/s]global step 620000, trans_decision ep_re 17.68585748330262

{"global_step": 620000, "eval_re": [16.02007373676299, 23.33176457706196, 
11.954307085099277, 19.567896925665135, 24.56236361794911, 23.627812511471824, 
13.781282801992198, 10.881960185172828, 12.725949618273505, 20.40516377357734], 
"eval_len": [26, 23, 33, 22, 35, 27, 15, 13, 19, 20]}

 63%|██████▎   | 629998/1000000 [10:55:50<4:42:54, 21.80it/s]global step 630000, trans_decision ep_re 16.989066298988057

{"global_step": 630000, "eval_re": [9.82493203672468, 18.22224737483182, 
26.377778486904873, 12.847483375015395, 15.854074944226161, 13.747719369250555, 
34.10297838517094, 11.209718915372063, 8.57110247820685, 19.13262762417726], 
"eval_len": [12, 20, 27, 19, 20, 16, 38, 17, 34, 32]}

 64%|██████▍   | 639999/1000000 [11:06:20<4:38:37, 21.53it/s]global step 640000, trans_decision ep_re 59.64342910640018

{"global_step": 640000, "eval_re": [27.43729333294636, 24.617504985879865, 
18.18598544710378, 175.37599928430097, 78.5578771955245, 49.491518808213314, 
22.561608746445824, 21.130605044397022, 18.376259367142303, 160.69963885204774],
"eval_len": [23, 23, 38, 101, 57, 46, 32, 24, 20, 91]}

 65%|██████▍   | 649999/1000000 [11:16:31<4:31:04, 21.52it/s]global step 650000, trans_decision ep_re 23.79189329236541

{"global_step": 650000, "eval_re": [19.002528132050514, 19.108523794218645, 
29.099666704402654, 18.178942936386083, 13.069751962400685, 17.48309181021727, 
41.67251697282204, 33.8555549434686, 27.267310719361202, 19.181044948326402], 
"eval_len": [23, 25, 34, 21, 24, 23, 46, 34, 27, 29]}

 66%|██████▌   | 659999/1000000 [11:27:10<4:23:25, 21.51it/s]global step 660000, trans_decision ep_re 27.641493499626204

{"global_step": 660000, "eval_re": [16.162101174073037, 23.257482332626573, 
7.9849886817272555, 77.17038410889234, 10.401324391264955, 17.87178286704869, 
20.42017671889426, 54.00836678196869, 15.101369794654124, 34.03695814511212], 
"eval_len": [17, 26, 11, 52, 12, 20, 27, 57, 26, 42]}

 67%|██████▋   | 669998/1000000 [11:37:40<4:14:49, 21.58it/s]global step 670000, trans_decision ep_re 46.320552527498705

{"global_step": 670000, "eval_re": [30.962436014554466, 11.111934159796316, 
12.221017264840906, 165.65389033330587, 21.810190004834965, 23.74262008397971, 
70.8765285915892, 97.42305241659636, 15.960109794357095, 13.443746611132145], 
"eval_len": [35, 16, 15, 108, 24, 27, 50, 63, 21, 21]}

 68%|██████▊   | 679999/1000000 [11:47:53<4:08:52, 21.43it/s]global step 680000, trans_decision ep_re 30.298391825967986

{"global_step": 680000, "eval_re": [18.740686899909452, 16.23180542166531, 
11.052631826940791, 26.77487536507569, 25.42953280689262, 9.299816638877688, 
82.74240512822712, 17.779530034322367, 77.2223965010971, 17.71023763667169], 
"eval_len": [24, 18, 15, 35, 24, 20, 61, 23, 63, 17]}

 69%|██████▉   | 689999/1000000 [11:58:21<4:00:06, 21.52it/s]global step 690000, trans_decision ep_re 23.49877128147724

{"global_step": 690000, "eval_re": [28.64301067449205, 91.96334689399369, 
17.43795907603464, 10.797629849028727, 19.56580974093921, 17.70249582295226, 
10.388881642799966, 11.697450762325792, 7.967863688699967, 18.823264663506073], 
"eval_len": [32, 80, 17, 14, 22, 17, 18, 15, 14, 33]}

 70%|██████▉   | 699999/1000000 [12:09:00<3:52:17, 21.53it/s]global step 700000, trans_decision ep_re 15.564655363474511

{"global_step": 700000, "eval_re": [10.824670981599581, 13.378314433161329, 
11.816001466478715, 11.367990506067956, 20.959394510556933, 9.059351753046187, 
24.043251020203584, 20.038968838907547, 9.476238365267044, 24.682371759456224], 
"eval_len": [28, 20, 13, 13, 28, 23, 31, 20, 11, 25]}

 71%|███████   | 709998/1000000 [12:19:30<3:42:00, 21.77it/s]global step 710000, trans_decision ep_re 42.76107977686847

{"global_step": 710000, "eval_re": [90.85382232138457, 19.106178113781482, 
15.130242242970008, 19.743102414204362, 23.80676942709576, 81.5596756633455, 
100.67663248674724, 27.526752470207235, 38.27937013836479, 10.928252490583715], 
"eval_len": [61, 28, 31, 33, 21, 63, 80, 32, 47, 13]}

 72%|███████▏  | 719998/1000000 [12:30:00<3:36:52, 21.52it/s]global step 720000, trans_decision ep_re 57.867785759486296

{"global_step": 720000, "eval_re": [149.81729724884818, 22.557011072616362, 
19.11019732232416, 129.68099147644955, 12.313187002414786, 16.416026517665195, 
32.063470064298855, 132.22312866013, 50.72737343730392, 13.769174792812139], 
"eval_len": [110, 30, 21, 82, 20, 21, 30, 122, 38, 30]}

 73%|███████▎  | 729999/1000000 [12:40:12<3:29:45, 21.45it/s]global step 730000, trans_decision ep_re 32.40025438427473

{"global_step": 730000, "eval_re": [15.383814999983116, 10.734704654466874, 
18.04252116667298, 16.363669413959556, 36.57604216752936, 170.4102719549613, 
18.519407796918994, 14.394533842227323, 14.937137738858679, 8.640440107169194], 
"eval_len": [18, 15, 24, 20, 32, 103, 21, 33, 26, 13]}

 74%|███████▍  | 739999/1000000 [12:50:50<3:23:09, 21.33it/s]global step 740000, trans_decision ep_re 77.02258278522802

{"global_step": 740000, "eval_re": [15.102666110620083, 34.65200493849647, 
178.41902087874138, 100.05923107121787, 16.142524158900454, 78.71603008750458, 
8.607104354332087, 208.32599928156387, 20.574260075042456, 109.626986895861], 
"eval_len": [17, 31, 105, 76, 19, 56, 11, 110, 25, 59]}

 75%|███████▍  | 749999/1000000 [13:01:20<3:13:54, 21.49it/s]global step 750000, trans_decision ep_re 42.39820878318962

{"global_step": 750000, "eval_re": [11.660181286900686, 28.80098750437458, 
31.462677627516307, 10.44647550093301, 26.412773131623194, 23.60369503855133, 
18.69409882617834, 135.96156568489292, 125.39267666686045, 11.546956564065324], 
"eval_len": [15, 31, 61, 12, 29, 26, 19, 78, 90, 14]}

 76%|███████▌  | 759998/1000000 [13:11:50<3:05:53, 21.52it/s]global step 760000, trans_decision ep_re 29.260076284208054

{"global_step": 760000, "eval_re": [14.350721309646987, 19.652973157254436, 
14.47245392822642, 32.529522075130444, 15.40052450444792, 12.066282602949336, 
30.554837384272073, 7.574664041739023, 14.125093601953456, 131.87369023646045], 
"eval_len": [17, 19, 21, 30, 26, 19, 28, 17, 19, 76]}

 77%|███████▋  | 769998/1000000 [13:22:20<2:57:09, 21.64it/s]global step 770000, trans_decision ep_re 28.449929111686128

{"global_step": 770000, "eval_re": [29.241716304532368, 32.796620553713076, 
12.327381755991173, 28.868146370991965, 90.6137435057207, 14.59728748867982, 
15.520767708130258, 19.871571568666468, 23.012511351643866, 17.64954450879156], 
"eval_len": [35, 46, 16, 26, 51, 16, 26, 22, 24, 19]}

 78%|███████▊  | 779999/1000000 [13:32:32<2:51:14, 21.41it/s]global step 780000, trans_decision ep_re 28.19316431313478

{"global_step": 780000, "eval_re": [14.181141841871096, 122.20912089330973, 
13.384340123927021, 16.47206923806113, 19.27321299538147, 15.422323376226428, 
12.05051813645888, 42.49035554930272, 13.89088733895346, 12.55767363785586], 
"eval_len": [19, 87, 16, 20, 27, 23, 16, 32, 25, 16]}

 79%|███████▉  | 789999/1000000 [13:43:01<2:44:20, 21.30it/s]global step 790000, trans_decision ep_re 40.86807766708936

{"global_step": 790000, "eval_re": [15.455696769889006, 11.025989157918435, 
25.025626735728107, 12.48889858991701, 9.959979042431282, 110.08569674810212, 
12.499303196744275, 8.126696687305902, 7.9811294498714656, 196.03176029298595], 
"eval_len": [20, 15, 26, 25, 17, 84, 16, 16, 10, 124]}

 80%|███████▉  | 799999/1000000 [13:53:40<2:34:46, 21.54it/s]global step 800000, trans_decision ep_re 32.07772028953017

{"global_step": 800000, "eval_re": [21.017825174153312, 14.820289392297035, 
14.561622882815715, 24.832130804953586, 153.1951471162464, 12.445448032840574, 
26.858550367699205, 13.602700740615534, 25.70204467282968, 13.741443710850659], 
"eval_len": [24, 16, 17, 35, 97, 29, 35, 27, 29, 21]}

 81%|████████  | 809999/1000000 [14:04:10<2:26:58, 21.55it/s]global step 810000, trans_decision ep_re 54.849017019202186

{"global_step": 810000, "eval_re": [159.55971004985994, 17.142131109764478, 
184.5575068017698, 83.8736492772647, 12.87434797851757, 25.29172457349608, 
22.834339267960754, 10.435775338178233, 15.964556763796923, 15.956429031413391],
"eval_len": [78, 18, 91, 60, 14, 25, 28, 28, 20, 22]}

 82%|████████▏ | 819999/1000000 [14:14:40<2:19:22, 21.53it/s]global step 820000, trans_decision ep_re 27.6340225660299

{"global_step": 820000, "eval_re": [17.42194993577759, 37.38246306961154, 
27.005501826457014, 22.46598456807587, 20.9886115651733, 7.971886199587636, 
24.417356605492422, 26.36233329007389, 75.14206086555498, 17.182077734494708], 
"eval_len": [21, 45, 32, 24, 22, 12, 23, 25, 47, 25]}

 83%|████████▎ | 829999/1000000 [14:25:10<2:13:49, 21.17it/s]global step 830000, trans_decision ep_re 16.242800589499744

{"global_step": 830000, "eval_re": [13.09870978876122, 12.279577150511335, 
13.76838061338112, 10.15826844359394, 20.23996339628157, 10.444681414132633, 
17.80684196350687, 15.046971602717665, 17.132367651647108, 32.452243870463995], 
"eval_len": [21, 19, 16, 18, 29, 19, 18, 20, 19, 36]}

 84%|████████▍ | 839999/1000000 [14:35:40<2:04:18, 21.45it/s]global step 840000, trans_decision ep_re 39.18108930447478

{"global_step": 840000, "eval_re": [15.212959619408934, 24.42498671917556, 
95.01088551165259, 20.91954448271931, 24.966197908980043, 31.016797312930592, 
80.23185571325612, 12.579383340895912, 68.29037968097897, 19.15790275474978], 
"eval_len": [20, 31, 89, 26, 32, 33, 53, 19, 44, 20]}

 85%|████████▍ | 849999/1000000 [14:46:10<1:57:00, 21.36it/s]global step 850000, trans_decision ep_re 30.8498005610489

{"global_step": 850000, "eval_re": [7.802099728551494, 17.674888034980373, 
19.03999494357729, 15.082941482017121, 18.061747652814482, 18.997923483442502, 
19.8189951376846, 11.912960251300056, 11.137326672091149, 168.96912822402996], 
"eval_len": [14, 27, 20, 32, 21, 23, 29, 28, 14, 99]}

 86%|████████▌ | 859999/1000000 [14:56:41<1:49:16, 21.35it/s]global step 860000, trans_decision ep_re 37.75018757766857

{"global_step": 860000, "eval_re": [109.47350740088817, 16.7657631786002, 
12.150568204355416, 19.8910601274286, 110.81042642417412, 28.73570236300716, 
23.33615450171382, 16.802617073761034, 26.407413746297834, 13.128662756459388], 
"eval_len": [77, 18, 16, 28, 66, 31, 34, 19, 25, 16]}

 87%|████████▋ | 869998/1000000 [15:07:11<1:40:11, 21.63it/s]global step 870000, trans_decision ep_re 42.07338878550528

{"global_step": 870000, "eval_re": [12.160483477293521, 15.760688879227429, 
25.43405879469915, 88.02560468269459, 15.208920846018202, 15.723688932344189, 
129.3488564730961, 55.52960833915643, 29.130707079123084, 34.41127035140003], 
"eval_len": [14, 19, 33, 55, 17, 17, 77, 53, 42, 60]}

 88%|████████▊ | 879998/1000000 [15:17:41<1:31:56, 21.76it/s]global step 880000, trans_decision ep_re 20.951422617909312

{"global_step": 880000, "eval_re": [50.42930807711992, 16.48606604237259, 
16.883549838736126, 17.161497991304298, 42.82094068559293, 17.908203553375404, 
10.276257302981765, 9.415159380649332, 16.230019230023203, 11.903224076937558], 
"eval_len": [38, 18, 23, 18, 43, 25, 16, 14, 17, 31]}

 89%|████████▉ | 889999/1000000 [15:27:52<1:25:20, 21.48it/s]global step 890000, trans_decision ep_re 18.762391496592183

{"global_step": 890000, "eval_re": [12.208919655109897, 24.030696494796285, 
13.886970850898544, 11.816201365595207, 9.07957407049556, 11.565226887196694, 
17.15717549037259, 19.849789053823407, 41.35836262375124, 26.670998473882413], 
"eval_len": [26, 33, 21, 18, 13, 22, 32, 25, 34, 31]}

 90%|████████▉ | 899999/1000000 [15:38:31<1:17:27, 21.52it/s]global step 900000, trans_decision ep_re 30.170460737404007

{"global_step": 900000, "eval_re": [16.18954763438149, 13.281855347104639, 
33.38694786439594, 16.028404570716358, 11.19140467229671, 21.224310361911773, 
123.39917353525503, 12.038786372262697, 41.588975853517404, 13.375201162198032],
"eval_len": [17, 20, 29, 31, 15, 28, 73, 24, 52, 14]}

 91%|█████████ | 909999/1000000 [15:49:01<1:09:53, 21.46it/s]global step 910000, trans_decision ep_re 21.72268810400134

{"global_step": 910000, "eval_re": [11.913927845587791, 56.1026836521386, 
20.70781328403973, 15.551983419026515, 11.096811760512487, 27.358966660291284, 
29.944599315978344, 10.20654617404376, 19.746500302246034, 14.59704862614884], 
"eval_len": [19, 44, 24, 24, 16, 23, 25, 22, 29, 18]}

 92%|█████████▏| 919998/1000000 [15:59:31<1:01:43, 21.60it/s]global step 920000, trans_decision ep_re 21.25595711954929

{"global_step": 920000, "eval_re": [25.559979198971142, 24.147892124065535, 
11.50697091491915, 8.138785413402454, 20.175163953158595, 15.179799679098258, 
15.892543925565988, 41.52597923042466, 34.21580894632986, 16.21664780955726], 
"eval_len": [27, 36, 16, 11, 22, 16, 26, 32, 34, 18]}

 93%|█████████▎| 929999/1000000 [16:09:42<54:18, 21.48it/s]global step 930000, trans_decision ep_re 36.0386906589763

{"global_step": 930000, "eval_re": [23.993557790150163, 11.836115914891147, 
16.768003037285766, 21.149771826915625, 21.770370472664162, 18.520112970345576, 
12.152326817879288, 114.8748341916469, 90.22235041224876, 29.099463155735627], 
"eval_len": [24, 19, 20, 32, 25, 26, 32, 62, 58, 30]}

 94%|█████████▍| 939999/1000000 [16:20:11<47:10, 21.20it/s]global step 940000, trans_decision ep_re 34.164729700157054

{"global_step": 940000, "eval_re": [13.76329813987667, 23.977181901910875, 
183.3799348081032, 11.543576323748947, 13.822628598300588, 22.918698664602765, 
23.81394809327165, 11.351746030788295, 14.695593501059312, 22.380690939908302], 
"eval_len": [32, 24, 102, 28, 21, 21, 36, 17, 24, 26]}

 95%|█████████▍| 949999/1000000 [16:30:42<38:59, 21.38it/s]global step 950000, trans_decision ep_re 43.43923170323016

{"global_step": 950000, "eval_re": [23.140205913537788, 21.413756243134365, 
16.01489244333343, 9.029988769317074, 13.390629797602086, 75.6665635199449, 
18.845156402575583, 8.622108760199218, 167.91253172817508, 80.35648345448212], 
"eval_len": [26, 27, 21, 15, 15, 49, 21, 16, 112, 52]}

 96%|█████████▌| 959999/1000000 [16:41:13<31:23, 21.24it/s]global step 960000, trans_decision ep_re 17.552452561427675

{"global_step": 960000, "eval_re": [9.797498228379508, 30.319171177634452, 
18.053453325989217, 9.601610841796829, 12.203955198610075, 21.49530181445048, 
30.78669658840515, 10.291673225412161, 18.471361590936215, 14.503803622662646], 
"eval_len": [16, 28, 18, 35, 17, 20, 33, 33, 20, 20]}

 97%|█████████▋| 969998/1000000 [16:52:01<23:12, 21.54it/s]global step 970000, trans_decision ep_re 48.30161029185749

{"global_step": 970000, "eval_re": [12.984411075689497, 23.579359708667738, 
11.038926831682014, 107.02741276257315, 98.00449051483137, 69.2392754710604, 
27.256279678284873, 14.58505087018481, 107.14702006467421, 12.153875940926797], 
"eval_len": [20, 23, 13, 79, 80, 52, 46, 18, 77, 16]}

 98%|█████████▊| 979998/1000000 [17:02:31<15:22, 21.68it/s]global step 980000, trans_decision ep_re 40.530954554893455

{"global_step": 980000, "eval_re": [10.76882844758457, 70.85824835351828, 
12.524014402023198, 19.407938346731775, 10.385006110924273, 94.7807153724402, 
21.200994370802903, 29.479489103640507, 104.29861490262711, 31.60569613864183], 
"eval_len": [16, 66, 17, 24, 14, 69, 22, 32, 74, 35]}

 99%|█████████▉| 989998/1000000 [17:13:01<07:49, 21.29it/s]global step 990000, trans_decision ep_re 19.39092864555468

{"global_step": 990000, "eval_re": [13.420945035790565, 8.505801014176571, 
58.24990694363538, 21.600238085464973, 22.918373115348253, 19.36325446085277, 
12.73234020481406, 13.119000307302903, 12.857497118088714, 11.141930170072598], 
"eval_len": [21, 11, 65, 20, 24, 35, 20, 22, 27, 29]}

100%|█████████▉| 999999/1000000 [17:23:41<00:00, 21.33it/s]global step 1000000, trans_decision ep_re 62.37142874324718

{"global_step": 1000000, "eval_re": [187.01074345274978, 76.45621774856679, 
5.9520878445194505, 20.92002826211189, 20.79715994892542, 159.16455434706057, 
24.473842408763563, 9.941940887249523, 99.89389466852695, 19.103817863997893], 
"eval_len": [100, 54, 9, 22, 24, 93, 27, 15, 94, 31]}

100%|██████████| 1000000/1000000 [17:23:41<00:00, 15.97it/s]
