
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:21<10:40:56, 25.74it/s]global step 10000, trans_decision ep_re 37.082298230067025

{"global_step": 10000, "eval_re": [14.265759493256303, 15.996579254518359, 
14.900459447395761, 222.6717659825286, 19.614097862337168, 21.97568325517734, 
18.33349351429964, 18.79550948653776, 8.073672370796869, 16.19596163382244], 
"eval_len": [20, 22, 21, 114, 22, 21, 18, 21, 15, 19]}

  2%|▏         | 19998/1000000 [13:03<10:28:30, 25.99it/s]global step 20000, trans_decision ep_re 37.122778813656865

{"global_step": 20000, "eval_re": [12.564830951584156, 196.76723433189844, 
15.215819480395826, 13.668019034711355, 24.14188923851961, 41.703357927862996, 
22.95002758651012, 12.018357969661981, 13.695797609362879, 18.502454006061317], 
"eval_len": [15, 113, 16, 20, 24, 61, 29, 16, 18, 20]}

  3%|▎         | 29999/1000000 [22:00<10:38:31, 25.32it/s]global step 30000, trans_decision ep_re 40.76218785200159

{"global_step": 30000, "eval_re": [75.77680886286531, 13.251872797114993, 
114.48036504479572, 86.11374052610805, 23.736003144765565, 17.610273216314827, 
15.03744734170417, 11.77708538039204, 12.511918400062948, 37.32636380589226], 
"eval_len": [45, 15, 93, 52, 26, 22, 16, 15, 17, 57]}

  4%|▍         | 39998/1000000 [30:32<10:21:19, 25.75it/s]global step 40000, trans_decision ep_re 19.815438001903694

{"global_step": 40000, "eval_re": [11.33133042632619, 30.329180289902663, 
17.846536811559936, 17.986044184281376, 22.450252097763375, 12.68294638118609, 
23.775459915048174, 18.069629811957498, 17.23109279196064, 26.45190730905096], 
"eval_len": [17, 32, 21, 21, 21, 20, 30, 28, 20, 30]}

  5%|▍         | 49997/1000000 [39:30<10:18:56, 25.58it/s]global step 50000, trans_decision ep_re 54.70667088645412

{"global_step": 50000, "eval_re": [16.942579693073363, 14.983046183903713, 
14.369285811628874, 74.60672883652347, 46.47199531311926, 54.882398991243186, 
199.9973202603816, 96.92916756723844, 12.708065149795928, 15.176121057633335], 
"eval_len": [19, 20, 20, 54, 52, 54, 107, 71, 17, 23]}

  6%|▌         | 59999/1000000 [48:00<10:15:09, 25.47it/s]global step 60000, trans_decision ep_re 40.17857621424377

{"global_step": 60000, "eval_re": [13.154989172513078, 183.08913353048263, 
21.964756524580515, 14.142909553864166, 17.189249527925064, 91.24152208862681, 
13.746097891464787, 11.368504159367593, 13.984708622483119, 21.903891071129905],
"eval_len": [16, 120, 26, 23, 27, 74, 21, 25, 16, 26]}

  7%|▋         | 69997/1000000 [57:00<10:14:06, 25.24it/s]global step 70000, trans_decision ep_re 86.35137157738367

{"global_step": 70000, "eval_re": [22.308247165750252, 18.85996195711371, 
84.79755138826528, 63.729159043576296, 72.46458039105494, 35.88233188063088, 
253.75654244230276, 269.5481114996883, 27.927332098314984, 14.239897907139335], 
"eval_len": [28, 22, 69, 48, 71, 33, 196, 159, 29, 17]}

  8%|▊         | 79999/1000000 [1:05:40<10:05:25, 25.33it/s]global step 80000, trans_decision ep_re 50.8748046910027

{"global_step": 80000, "eval_re": [88.815140892306, 24.920766275570767, 
64.52813886561965, 15.562538566552615, 68.9296910863916, 22.78991236663843, 
16.39525830587052, 12.24165617163828, 61.93157489557295, 132.63336948386623], 
"eval_len": [59, 28, 85, 16, 89, 28, 18, 19, 48, 68]}

  9%|▉         | 89998/1000000 [1:14:14<9:46:23, 25.86it/s]global step 90000, trans_decision ep_re 42.661905317775556

{"global_step": 90000, "eval_re": [10.872707535991848, 70.06823941075976, 
17.446330256789434, 51.22713919327439, 123.77677383009753, 8.527763542424422, 
16.524055822646385, 101.09440107951427, 10.221555547127181, 16.86008695913033], 
"eval_len": [14, 63, 19, 59, 78, 13, 21, 67, 13, 18]}

 10%|▉         | 99999/1000000 [1:23:10<9:52:17, 25.33it/s]global step 100000, trans_decision ep_re 45.44962632052574

{"global_step": 100000, "eval_re": [13.726223507489768, 101.94571544218647, 
69.85986814752265, 73.02739574920298, 22.5899268331939, 19.563092082533565, 
19.92949673140225, 14.066922241235712, 98.5243379205279, 21.26328454996215], 
"eval_len": [20, 78, 48, 52, 20, 21, 30, 16, 64, 27]}

 11%|█         | 109999/1000000 [1:31:42<9:44:32, 25.38it/s]global step 110000, trans_decision ep_re 40.26717003918551

{"global_step": 110000, "eval_re": [81.45749173937737, 60.85151206574706, 
15.634687415241508, 29.255920519007343, 75.99989270883576, 16.572802976724173, 
12.060493327363943, 76.50241600405826, 11.211291485489156, 23.125192150010573], 
"eval_len": [46, 42, 22, 33, 71, 21, 18, 68, 20, 31]}

 12%|█▏        | 119997/1000000 [1:40:40<9:44:04, 25.11it/s]global step 120000, trans_decision ep_re 83.78726137113728

{"global_step": 120000, "eval_re": [56.90891668191092, 15.955925451840132, 
84.26447659778361, 92.81182634080554, 103.53732077500078, 142.0712888538067, 
112.30304801861155, 132.60167689754758, 80.54401151356879, 16.874122580497264], 
"eval_len": [60, 25, 62, 75, 74, 81, 66, 120, 63, 25]}

 13%|█▎        | 129998/1000000 [1:49:14<9:31:59, 25.35it/s]global step 130000, trans_decision ep_re 70.58475026808169

{"global_step": 130000, "eval_re": [18.087672462235346, 14.268859320592119, 
33.40558507202866, 130.09641873140458, 83.58631623726114, 19.797378653750098, 
140.15011309008725, 161.0994199050137, 29.618847088337898, 75.7368921201061], 
"eval_len": [23, 22, 32, 78, 60, 21, 122, 93, 36, 66]}

 14%|█▍        | 139999/1000000 [1:58:02<9:34:22, 24.95it/s]global step 140000, trans_decision ep_re 45.06317650239016

{"global_step": 140000, "eval_re": [11.2166392158401, 25.89950529996327, 
14.015702792791098, 33.08351988484876, 232.61373699182698, 49.14342576022505, 
22.10508798399189, 15.530594558116826, 32.007847943738646, 15.01570459255894], 
"eval_len": [15, 25, 18, 28, 129, 49, 21, 24, 32, 20]}

 15%|█▍        | 149999/1000000 [2:06:50<9:22:16, 25.20it/s]global step 150000, trans_decision ep_re 51.441433935141426

{"global_step": 150000, "eval_re": [187.3298557948337, 23.839110955844507, 
77.44790714461821, 17.57497882538275, 15.671283273018735, 27.34335106836526, 
25.741552239044612, 22.071628716441186, 20.13996314700209, 97.25470818686316], 
"eval_len": [100, 24, 58, 25, 21, 28, 23, 27, 22, 69]}

 16%|█▌        | 159999/1000000 [2:15:50<9:11:28, 25.39it/s]global step 160000, trans_decision ep_re 91.23862440806299

{"global_step": 160000, "eval_re": [15.045421444455396, 15.420223539252836, 
19.083582908227164, 140.95646981568916, 119.2124066238298, 12.237733684242396, 
264.23434494340563, 17.82407543470582, 166.96258631813785, 141.4093993686839], 
"eval_len": [23, 20, 24, 96, 84, 19, 121, 24, 101, 71]}

 17%|█▋        | 169997/1000000 [2:24:40<9:06:51, 25.30it/s]global step 170000, trans_decision ep_re 64.08820116079914

{"global_step": 170000, "eval_re": [109.51539433455925, 26.63423551538096, 
124.62919129121606, 15.382558790152824, 84.76266658833528, 27.334189803579914, 
25.51135698807969, 126.26691133137311, 23.696892198072614, 77.14861476724167], 
"eval_len": [83, 30, 112, 17, 55, 31, 28, 92, 27, 46]}

 18%|█▊        | 179998/1000000 [2:33:14<8:57:32, 25.42it/s]global step 180000, trans_decision ep_re 54.56204895894293

{"global_step": 180000, "eval_re": [21.525411110333835, 15.986834598572859, 
13.211527443178708, 67.97320798385115, 188.1120938246074, 36.01472500264093, 
13.968032722306484, 17.82993292893813, 148.08149048099236, 22.917233494007537], 
"eval_len": [26, 20, 17, 46, 122, 30, 17, 27, 101, 22]}

 19%|█▉        | 189998/1000000 [2:42:03<8:46:04, 25.66it/s]global step 190000, trans_decision ep_re 40.58634100838522

{"global_step": 190000, "eval_re": [12.445638697301659, 10.352156903175517, 
17.13939690517977, 120.28475423828664, 20.715412915476286, 19.61334965254629, 
27.771153807950032, 17.516737108271293, 20.72557918143573, 139.299230674229], 
"eval_len": [17, 13, 21, 87, 22, 28, 26, 23, 22, 88]}

 20%|█▉        | 199999/1000000 [2:51:00<8:41:48, 25.55it/s]global step 200000, trans_decision ep_re 55.89290373694305

{"global_step": 200000, "eval_re": [74.21203203798346, 18.4479083126092, 
29.960090125665417, 26.864648903980903, 82.51960785185466, 152.72221062315091, 
116.25882191084132, 8.818115914684476, 26.082243973978354, 23.043357714681797], 
"eval_len": [60, 18, 37, 33, 60, 86, 71, 12, 26, 31]}

 21%|██        | 209999/1000000 [2:59:32<8:35:04, 25.56it/s]global step 210000, trans_decision ep_re 73.40478425940478

{"global_step": 210000, "eval_re": [12.933541742410284, 19.700533650382837, 
313.0499051878993, 137.81112347223274, 10.580615774683329, 116.29364834727015, 
21.318797224380653, 74.72126326306132, 12.168619040355411, 15.469794891371754], 
"eval_len": [17, 22, 172, 93, 17, 72, 24, 59, 14, 24]}

 22%|██▏       | 219999/1000000 [3:08:30<8:31:46, 25.40it/s]global step 220000, trans_decision ep_re 39.85104399599495

{"global_step": 220000, "eval_re": [67.343499721564, 11.64319493991076, 
22.48039884978784, 20.476049652150635, 34.006506996555785, 12.446735368201153, 
105.7666672585373, 17.324522898248922, 11.879509170031392, 95.14335510496176], 
"eval_len": [44, 16, 22, 25, 39, 15, 81, 21, 16, 67]}

 23%|██▎       | 229999/1000000 [3:17:02<8:26:43, 25.33it/s]global step 230000, trans_decision ep_re 127.3615490229881

{"global_step": 230000, "eval_re": [88.38563009469073, 132.53717217544434, 
19.41688755063927, 478.2824614154081, 85.29567017828319, 143.85463893140115, 
88.4241686994631, 102.1633305015046, 12.150294556519516, 123.10523612652692], 
"eval_len": [86, 82, 26, 159, 68, 87, 70, 74, 19, 91]}

 24%|██▍       | 239999/1000000 [3:26:00<8:16:07, 25.53it/s]global step 240000, trans_decision ep_re 52.0951477388635

{"global_step": 240000, "eval_re": [102.86635916880807, 12.881552533688398, 
8.605102319369832, 23.72030633006112, 24.949314594119183, 68.44511265620825, 
120.71680595886941, 121.05128250783554, 20.075323546616755, 17.64031777305848], 
"eval_len": [77, 19, 11, 27, 23, 53, 70, 87, 24, 21]}

 25%|██▍       | 249998/1000000 [3:34:33<8:05:19, 25.76it/s]global step 250000, trans_decision ep_re 63.77956408196426

{"global_step": 250000, "eval_re": [91.4697347846997, 17.316569151655784, 
82.16011133939071, 199.56332437023067, 72.78543199475466, 23.780025953318354, 
15.278918891499615, 15.871078485369654, 105.0576112954036, 14.512834553319909], 
"eval_len": [65, 22, 58, 124, 62, 26, 24, 18, 72, 19]}

 26%|██▌       | 259999/1000000 [3:43:30<8:09:02, 25.22it/s]global step 260000, trans_decision ep_re 63.74563967679264

{"global_step": 260000, "eval_re": [81.06024789572498, 150.11776911221142, 
36.710086513578865, 28.077027259826444, 85.52347416643016, 97.73057631250904, 
26.27776722737639, 20.420453720447643, 24.707953821200935, 86.83104073862053], 
"eval_len": [54, 95, 32, 29, 68, 60, 24, 22, 27, 62]}

 27%|██▋       | 269998/1000000 [3:52:03<7:55:15, 25.60it/s]global step 270000, trans_decision ep_re 65.6105951871338

{"global_step": 270000, "eval_re": [82.30368100554016, 165.86956162077269, 
15.728797733552511, 19.993616457293232, 131.75823224976375, 24.990296926482127, 
9.44403262289621, 178.32292778376052, 15.881830099462926, 11.812975371813911], 
"eval_len": [53, 118, 19, 22, 106, 27, 14, 115, 21, 16]}

 28%|██▊       | 279999/1000000 [4:01:00<7:57:26, 25.13it/s]global step 280000, trans_decision ep_re 24.196936417795456

{"global_step": 280000, "eval_re": [15.52660265473279, 25.022595890949333, 
14.227290397773457, 20.969131467057107, 23.65285254622955, 13.428570897552232, 
11.942555417525272, 9.591658739758813, 91.59238218556845, 16.01572398080755], 
"eval_len": [18, 36, 19, 29, 21, 17, 18, 14, 54, 21]}

 29%|██▉       | 289998/1000000 [4:09:33<7:38:52, 25.79it/s]global step 290000, trans_decision ep_re 42.23913109074569

{"global_step": 290000, "eval_re": [17.528108531051995, 108.65812142721947, 
12.909978821493876, 17.349220384487975, 94.39134633944022, 23.418567723649666, 
21.698195467961533, 19.519018054446818, 14.511876401544482, 92.4068777561608], 
"eval_len": [20, 83, 15, 23, 80, 28, 29, 20, 19, 86]}

 30%|██▉       | 299999/1000000 [4:18:30<7:40:08, 25.35it/s]global step 300000, trans_decision ep_re 79.51743588649728

{"global_step": 300000, "eval_re": [13.670712283098945, 124.16674149204195, 
19.72199468353971, 15.267660394911639, 14.921602833157559, 157.72702200363327, 
18.416499419474047, 23.501093170349176, 264.5683199672905, 143.2127126174761], 
"eval_len": [18, 78, 20, 20, 20, 112, 20, 27, 143, 71]}

 31%|███       | 309998/1000000 [4:27:03<7:30:10, 25.55it/s]global step 310000, trans_decision ep_re 66.61963065100701

{"global_step": 310000, "eval_re": [16.457423888777328, 17.628408636629242, 
141.1200300978686, 18.54991883243543, 18.891649547499334, 25.56080106086233, 
157.68684604459793, 12.88909153518292, 110.6757075687021, 146.73642929751483], 
"eval_len": [24, 25, 99, 24, 25, 25, 93, 18, 72, 89]}

 32%|███▏      | 319999/1000000 [4:36:00<7:33:38, 24.98it/s]global step 320000, trans_decision ep_re 150.74485632095988

{"global_step": 320000, "eval_re": [24.44355525734091, 190.74117266915007, 
234.3993180735053, 327.9469305109264, 36.095860407904375, 173.3184747860428, 
35.76568175151168, 180.61192150930188, 203.32085685837026, 100.80479138554523], 
"eval_len": [32, 102, 128, 191, 34, 149, 30, 91, 123, 59]}

 33%|███▎      | 329997/1000000 [4:44:50<7:22:57, 25.21it/s]global step 330000, trans_decision ep_re 33.424026385270416

{"global_step": 330000, "eval_re": [16.279245712568397, 17.714622342496472, 
15.895312352988514, 17.128121718393096, 21.97057009834718, 13.603432024812728, 
102.97905211825625, 14.359158598450191, 99.77115116794772, 14.53959771844359], 
"eval_len": [17, 20, 30, 18, 26, 21, 70, 20, 74, 21]}

 34%|███▍      | 339999/1000000 [4:53:21<7:22:25, 24.86it/s]global step 340000, trans_decision ep_re 63.349246309193894

{"global_step": 340000, "eval_re": [14.300668275977936, 103.85870967452324, 
21.25727354972206, 231.67306407751656, 18.188501821616534, 138.26454957384723, 
11.012773187505637, 43.88467839104018, 18.89207504366698, 32.160169496522485], 
"eval_len": [24, 71, 34, 110, 18, 77, 13, 33, 25, 30]}

 35%|███▍      | 349999/1000000 [5:02:20<7:14:45, 24.92it/s]global step 350000, trans_decision ep_re 126.20758159521829

{"global_step": 350000, "eval_re": [100.58016386672112, 149.4591036016222, 
277.2787412772643, 142.00281324638132, 87.18541487609352, 16.955545030090594, 
253.4131875830606, 198.21826373247265, 13.852373748299458, 23.130208990177177], 
"eval_len": [65, 124, 179, 79, 68, 25, 124, 99, 23, 25]}

 36%|███▌      | 359997/1000000 [5:11:10<7:06:20, 25.02it/s]global step 360000, trans_decision ep_re 68.68367267159043

{"global_step": 360000, "eval_re": [104.21374499430713, 15.601987598413702, 
122.48828189996223, 147.59374353681196, 121.58967556410197, 22.394504987421648, 
100.30911466673827, 19.460744864762116, 16.15557122159797, 17.02935738178737], 
"eval_len": [79, 23, 71, 90, 71, 24, 81, 23, 23, 19]}

 37%|███▋      | 369998/1000000 [5:20:00<6:58:15, 25.10it/s]global step 370000, trans_decision ep_re 65.59364969509275

{"global_step": 370000, "eval_re": [137.1106723084999, 21.294295321888583, 
114.08975766187908, 24.899445172212804, 20.029031190476488, 21.251530182458335, 
99.54936744521508, 64.85533202290176, 127.51593342432001, 25.341132221075565], 
"eval_len": [91, 24, 79, 25, 27, 20, 67, 44, 91, 26]}

 38%|███▊      | 379999/1000000 [5:28:31<6:57:25, 24.76it/s]global step 380000, trans_decision ep_re 34.872710117219434

{"global_step": 380000, "eval_re": [107.32029985441876, 13.795471488064106, 
13.983541479541636, 10.709023586487373, 16.57120286586642, 15.358809573023983, 
29.526207017330613, 100.1953037627107, 18.6787355934671, 22.58850595128363], 
"eval_len": [68, 17, 20, 15, 23, 18, 31, 65, 28, 28]}

 39%|███▉      | 389999/1000000 [5:37:30<6:49:44, 24.81it/s]global step 390000, trans_decision ep_re 33.518705629126615

{"global_step": 390000, "eval_re": [15.7723204902579, 31.138675539444417, 
16.972012696116703, 18.680460992028426, 21.600333672661126, 22.569286069253213, 
16.474273513589704, 22.252760031755574, 149.503443831442, 20.22348945471701], 
"eval_len": [21, 28, 21, 30, 25, 28, 25, 22, 94, 26]}

 40%|███▉      | 399997/1000000 [5:46:04<6:32:52, 25.45it/s]global step 400000, trans_decision ep_re 74.45483713884758

{"global_step": 400000, "eval_re": [20.249551997090045, 20.976537350309485, 
32.2991664050374, 148.8733738910813, 20.61411137991772, 115.77723092361421, 
11.263898391405162, 12.20425225301143, 78.6934786125648, 283.5967701844441], 
"eval_len": [28, 29, 35, 97, 29, 90, 15, 16, 62, 170]}

 41%|████      | 409999/1000000 [5:54:52<6:28:53, 25.29it/s]global step 410000, trans_decision ep_re 84.45618049703238

{"global_step": 410000, "eval_re": [68.76911839858522, 146.35421770021668, 
48.45853417071941, 13.783874333452367, 169.11102234441424, 84.40093334742792, 
23.48419285435162, 247.19696536043065, 26.631430624936385, 16.371515835789403], 
"eval_len": [62, 94, 48, 17, 98, 59, 26, 129, 28, 21]}

 42%|████▏     | 419999/1000000 [6:03:40<6:18:18, 25.55it/s]global step 420000, trans_decision ep_re 31.39629673187678

{"global_step": 420000, "eval_re": [93.51675106148286, 15.097670555595448, 
28.236167620145764, 11.451445394282786, 23.95368267578396, 35.76069606695561, 
16.18784666779193, 12.838810393654052, 65.71921108018537, 11.200685802890085], 
"eval_len": [61, 26, 31, 14, 29, 37, 18, 18, 61, 16]}

 43%|████▎     | 429998/1000000 [6:12:24<6:14:12, 25.39it/s]global step 430000, trans_decision ep_re 28.738874583060998

{"global_step": 430000, "eval_re": [22.206430278894295, 14.56515312489201, 
21.963103737605774, 15.396033309610614, 15.617396630758492, 11.469633083802172, 
37.47337163588841, 24.728776331187337, 92.62548284092601, 31.343364857044904], 
"eval_len": [26, 18, 28, 19, 18, 14, 35, 25, 69, 28]}

 44%|████▍     | 439999/1000000 [6:21:20<6:08:58, 25.30it/s]global step 440000, trans_decision ep_re 77.49062572041888

{"global_step": 440000, "eval_re": [182.49468655943826, 71.43591652440891, 
148.0704745578666, 22.12677703864359, 20.096312744897507, 52.57186227928983, 
17.19821992338868, 16.47777283898549, 134.50971683086155, 109.92451790640847], 
"eval_len": [92, 48, 135, 46, 29, 86, 18, 27, 92, 100]}

 45%|████▍     | 449998/1000000 [6:29:54<5:59:25, 25.50it/s]global step 450000, trans_decision ep_re 58.325195922571766

{"global_step": 450000, "eval_re": [165.59208496254564, 11.192751842532427, 
24.923731716655986, 103.54467156952244, 22.198661053349927, 32.57360134230126, 
156.58480443273646, 24.820933471386358, 20.245856219377504, 21.574862615309627],
"eval_len": [89, 23, 26, 64, 25, 28, 90, 28, 19, 28]}

 46%|████▌     | 459999/1000000 [6:38:50<5:57:23, 25.18it/s]global step 460000, trans_decision ep_re 53.088689717001685

{"global_step": 460000, "eval_re": [22.40759641794725, 159.18189340613498, 
70.76266287524756, 15.213644944308918, 91.2360768958152, 15.742504051481118, 
30.846839389289713, 11.043542867673489, 26.293750273676572, 88.15838604844198], 
"eval_len": [27, 100, 55, 19, 76, 20, 30, 13, 28, 56]}

 47%|████▋     | 469998/1000000 [6:47:24<5:42:49, 25.77it/s]global step 470000, trans_decision ep_re 45.180487028729765

{"global_step": 470000, "eval_re": [20.075413815862614, 27.5835614677928, 
23.176573777368297, 63.00744318717905, 60.37151577330322, 23.16200154546755, 
25.397624337703654, 144.09782109716218, 32.62435036169846, 32.30856492375977], 
"eval_len": [28, 26, 27, 59, 57, 23, 30, 89, 32, 28]}

 48%|████▊     | 479999/1000000 [6:56:20<5:40:53, 25.42it/s]global step 480000, trans_decision ep_re 38.764313076695366

{"global_step": 480000, "eval_re": [22.539319735088252, 20.353990267818194, 
26.65813633210412, 33.23017576474833, 15.359165157161176, 49.43598653277924, 
20.43378003556926, 18.248142725820564, 48.11158328002859, 133.27285093583595], 
"eval_len": [24, 31, 30, 30, 27, 53, 28, 28, 48, 98]}

 49%|████▉     | 489998/1000000 [7:04:54<5:30:17, 25.74it/s]global step 490000, trans_decision ep_re 79.68741513949792

{"global_step": 490000, "eval_re": [245.54118660493165, 53.662758822965, 
257.8759176778269, 51.32185898546621, 31.658819321119452, 26.00102658675569, 
65.81072621780795, 19.824088340731983, 21.512814684080038, 23.664954153294325], 
"eval_len": [127, 51, 144, 47, 29, 29, 58, 23, 23, 28]}

 50%|████▉     | 499999/1000000 [7:13:50<5:28:56, 25.33it/s]global step 500000, trans_decision ep_re 30.75497650558639

{"global_step": 500000, "eval_re": [17.97711541885061, 10.770784446171556, 
144.44757332096444, 19.201392702753278, 12.338928248450488, 17.01909052435934, 
26.466273920359534, 16.001705164154895, 30.372003444384667, 12.954897865415083],
"eval_len": [21, 15, 80, 28, 15, 20, 28, 22, 26, 22]}

 51%|█████     | 509998/1000000 [7:22:23<5:17:33, 25.72it/s]global step 510000, trans_decision ep_re 68.89411309726735

{"global_step": 510000, "eval_re": [19.554050781621886, 13.49476684704616, 
194.02976018380306, 14.851580040411308, 94.36110952730122, 25.507777289636806, 
100.75937934821529, 148.79680182752918, 67.26065406017416, 10.325251066934378], 
"eval_len": [24, 24, 111, 26, 58, 28, 69, 134, 76, 13]}

 52%|█████▏    | 519999/1000000 [7:31:20<5:17:11, 25.22it/s]global step 520000, trans_decision ep_re 70.44755587997518

{"global_step": 520000, "eval_re": [51.135072406712496, 182.97953139268964, 
22.204890066231815, 112.2865283291755, 18.677233908858625, 94.33014020794356, 
82.990964282237, 21.531934255818914, 95.34966380275067, 22.98960014733356], 
"eval_len": [48, 100, 22, 79, 22, 56, 67, 27, 109, 32]}

 53%|█████▎    | 529998/1000000 [7:39:53<5:07:02, 25.51it/s]global step 530000, trans_decision ep_re 49.135727812877974

{"global_step": 530000, "eval_re": [12.009268761900662, 28.313953146834532, 
97.07745704393497, 107.95559844677673, 18.779943751455416, 21.55437971778347, 
138.13379731323874, 14.479725944600803, 41.327919896791116, 11.725234105463256],
"eval_len": [18, 37, 64, 87, 20, 32, 88, 23, 38, 16]}

 54%|█████▍    | 539999/1000000 [7:48:50<5:02:51, 25.31it/s]global step 540000, trans_decision ep_re 78.06345325257335

{"global_step": 540000, "eval_re": [18.730641522752713, 107.90939805617995, 
15.720684730257295, 142.7719467493767, 106.62376980319534, 79.59340255763026, 
115.06355080781616, 16.104288489983453, 153.64707930827052, 24.469770500271224],
"eval_len": [25, 74, 20, 87, 79, 54, 71, 19, 81, 28]}

 55%|█████▍    | 549998/1000000 [7:57:23<4:52:38, 25.63it/s]global step 550000, trans_decision ep_re 32.96163214581886

{"global_step": 550000, "eval_re": [16.9220065717026, 41.139240594671875, 
14.010268700712972, 81.20639017645905, 10.586634971905605, 15.212442773318811, 
15.925481025622863, 21.095811286665953, 46.79549300454688, 66.72255235258203], 
"eval_len": [27, 43, 21, 54, 15, 18, 29, 26, 46, 47]}

 56%|█████▌    | 559999/1000000 [8:06:20<4:48:24, 25.43it/s]global step 560000, trans_decision ep_re 41.07793199022336

{"global_step": 560000, "eval_re": [28.974731308819386, 81.63725341419146, 
16.618172531819837, 24.29172720016669, 14.772535451542215, 120.78347544732975, 
19.823534367025683, 75.08927596816794, 9.7627715089052, 19.02584270426546], 
"eval_len": [38, 51, 18, 26, 20, 80, 25, 59, 13, 26]}

 57%|█████▋    | 569998/1000000 [8:14:54<4:39:14, 25.66it/s]global step 570000, trans_decision ep_re 38.445531097657934

{"global_step": 570000, "eval_re": [26.633693373681478, 22.273980008177976, 
18.22763277437643, 22.139522690318998, 73.57634675523681, 16.14576950619458, 
16.927755305451367, 134.26599720708177, 27.03067931254092, 27.23393404351904], 
"eval_len": [29, 28, 24, 27, 63, 20, 26, 70, 27, 26]}

 58%|█████▊    | 579999/1000000 [8:23:50<4:35:38, 25.40it/s]global step 580000, trans_decision ep_re 26.94036687828118

{"global_step": 580000, "eval_re": [17.892401776383128, 32.24685825465158, 
26.49417046292923, 12.203610232318914, 21.33663257523464, 68.08142615361109, 
18.81877162434474, 31.68032482350412, 23.318565669939634, 17.33090720989473], 
"eval_len": [22, 32, 25, 18, 23, 56, 30, 31, 27, 21]}

 59%|█████▉    | 589998/1000000 [8:32:25<4:29:51, 25.32it/s]global step 590000, trans_decision ep_re 52.04988830260852

{"global_step": 590000, "eval_re": [17.569357684544812, 14.889552940803219, 
53.93398877398699, 20.029629885766287, 20.717053007153684, 68.57083496862649, 
17.93313259929451, 278.7399389529687, 17.11575865316527, 10.999635559775255], 
"eval_len": [18, 25, 40, 28, 30, 93, 22, 133, 19, 14]}

 60%|█████▉    | 599999/1000000 [8:41:13<4:26:05, 25.05it/s]global step 600000, trans_decision ep_re 39.98987901240078

{"global_step": 600000, "eval_re": [16.955567579078654, 23.00461976792001, 
52.094242669293806, 75.26423870644007, 10.735019002927668, 114.52906167192847, 
49.29250244396227, 18.30931045230934, 11.456711676119408, 28.257516154028064], 
"eval_len": [28, 25, 55, 49, 14, 65, 43, 26, 15, 29]}

 61%|██████    | 609997/1000000 [8:50:10<4:23:29, 24.67it/s]global step 610000, trans_decision ep_re 39.94578401599544

{"global_step": 610000, "eval_re": [22.664295692253997, 17.665588220701594, 
14.192910685807394, 22.41643658060809, 19.91762464050452, 35.586035177451514, 
45.115836384647835, 175.67066575501354, 23.640803475396275, 22.587643547569705],
"eval_len": [25, 18, 15, 25, 20, 39, 55, 100, 24, 30]}

 62%|██████▏   | 619998/1000000 [8:59:00<4:10:18, 25.30it/s]global step 620000, trans_decision ep_re 54.70209921332438

{"global_step": 620000, "eval_re": [176.77982560282115, 17.603780026594723, 
51.25826284825971, 23.580870628938083, 80.07350178112043, 32.45642332123529, 
105.19063381968131, 14.137121589715768, 24.758685978237384, 21.181886536639883],
"eval_len": [96, 22, 66, 25, 57, 34, 72, 15, 23, 27]}

 63%|██████▎   | 629999/1000000 [9:07:32<4:09:20, 24.73it/s]global step 630000, trans_decision ep_re 38.99387162328289

{"global_step": 630000, "eval_re": [23.007546243917464, 19.54074739457709, 
93.3620893685133, 115.2588785994327, 14.109359250897512, 14.90609700556184, 
42.75809059052907, 29.010749566973704, 17.058882972405875, 20.926275240020413], 
"eval_len": [27, 29, 55, 71, 18, 19, 52, 29, 18, 32]}

 64%|██████▍   | 639999/1000000 [9:16:20<3:57:51, 25.22it/s]global step 640000, trans_decision ep_re 24.54146778254766

{"global_step": 640000, "eval_re": [13.354095443344875, 46.03918879327801, 
24.373732218729256, 17.35334587137499, 28.83877682364246, 17.405866699426536, 
39.62756639676284, 11.842215004213156, 14.419241150287704, 32.16064942441675], 
"eval_len": [17, 41, 26, 28, 29, 22, 39, 20, 25, 31]}

 65%|██████▍   | 649998/1000000 [9:25:04<3:48:56, 25.48it/s]global step 650000, trans_decision ep_re 39.385509148910415

{"global_step": 650000, "eval_re": [19.14444396601029, 141.7351439531545, 
26.39582172124838, 31.487159994948964, 16.019526466196975, 14.153493778267439, 
77.05618494509473, 27.531142873622258, 16.82742149365348, 23.504752296907085], 
"eval_len": [22, 100, 26, 29, 23, 23, 53, 31, 22, 28]}

 66%|██████▌   | 659999/1000000 [9:33:52<3:44:54, 25.20it/s]global step 660000, trans_decision ep_re 56.59504875429136

{"global_step": 660000, "eval_re": [309.3753632430773, 10.534168460805828, 
18.471751748729872, 17.975596312102713, 14.804530857854298, 21.49806772215917, 
31.784187744889042, 101.47445564186174, 23.688256581145623, 16.344109230288016],
"eval_len": [144, 14, 28, 24, 26, 27, 27, 64, 39, 18]}

 67%|██████▋   | 669999/1000000 [9:42:50<3:37:16, 25.31it/s]global step 670000, trans_decision ep_re 31.943866453887814

{"global_step": 670000, "eval_re": [21.78813755172233, 22.08458481462419, 
28.889773109527166, 29.205461478870777, 12.086283117507984, 35.07245531608959, 
48.25006813307733, 13.758345628495803, 16.070134583581726, 92.23342080538124], 
"eval_len": [21, 26, 27, 27, 15, 34, 40, 18, 17, 81]}

 68%|██████▊   | 679998/1000000 [9:51:24<3:29:34, 25.45it/s]global step 680000, trans_decision ep_re 37.94434052909926

{"global_step": 680000, "eval_re": [37.022604958686806, 31.38204251964985, 
15.17239364411768, 18.051935458143358, 16.697235900968334, 18.82745807045025, 
83.0930029664596, 21.95232906317322, 112.19361547474873, 25.05078723459476], 
"eval_len": [36, 32, 28, 20, 26, 21, 62, 27, 72, 27]}

 69%|██████▉   | 689999/1000000 [10:00:20<3:25:21, 25.16it/s]global step 690000, trans_decision ep_re 62.406374575472896

{"global_step": 690000, "eval_re": [15.920157180235817, 20.979644575143016, 
18.120099697910494, 26.965988422054718, 277.34130577145, 15.136158175573438, 
74.24054709531617, 73.56289939407945, 32.16960788223668, 69.62733756072916], 
"eval_len": [25, 21, 24, 27, 143, 17, 57, 71, 30, 47]}

 70%|██████▉   | 699997/1000000 [10:08:55<3:17:22, 25.33it/s]global step 700000, trans_decision ep_re 42.098906386166234

{"global_step": 700000, "eval_re": [12.547408943792119, 13.244972790966214, 
89.65923152074707, 67.91837199517738, 22.52260392801089, 19.588434761443008, 
23.25576877499708, 128.22945762172165, 31.442684939976957, 12.58012858482996], 
"eval_len": [16, 26, 81, 46, 22, 25, 24, 84, 35, 19]}

 71%|███████   | 709999/1000000 [10:17:50<3:10:01, 25.44it/s]global step 710000, trans_decision ep_re 55.701491709673384

{"global_step": 710000, "eval_re": [20.420014255870033, 132.55663378483166, 
25.118007261451275, 85.0424311879272, 16.850713300996393, 11.681525897644413, 
18.861760406382857, 23.08541053143108, 126.0387959816269, 97.35962448857198], 
"eval_len": [21, 87, 26, 63, 20, 16, 24, 31, 108, 71]}

 72%|███████▏  | 719998/1000000 [10:26:24<3:02:58, 25.50it/s]global step 720000, trans_decision ep_re 41.13139177933409

{"global_step": 720000, "eval_re": [16.58950243459341, 16.982973253640584, 
15.358336691294653, 90.78205106073466, 72.0325679134754, 24.017622527707818, 
80.9107729914024, 18.93884600604498, 52.207839027370134, 23.493405887076786], 
"eval_len": [17, 25, 21, 57, 76, 28, 63, 25, 75, 29]}

 73%|███████▎  | 729999/1000000 [10:35:20<2:57:30, 25.35it/s]global step 730000, trans_decision ep_re 76.2991018963457

{"global_step": 730000, "eval_re": [122.73442906873122, 20.038128726621707, 
21.34933319619093, 13.587020795976816, 15.901646452837479, 18.449614620090223, 
133.2590588896027, 124.35134475305843, 242.3963494158778, 50.92409304446974], 
"eval_len": [89, 28, 26, 21, 28, 22, 99, 81, 117, 42]}

 74%|███████▍  | 739998/1000000 [10:43:53<2:48:44, 25.68it/s]global step 740000, trans_decision ep_re 49.098945873847136

{"global_step": 740000, "eval_re": [186.863699061061, 17.981513257590702, 
25.477852815924276, 29.51572026668577, 14.06807648392126, 114.98626822199955, 
33.60937575928846, 38.249299855537124, 20.44745014800284, 9.790202868460389], 
"eval_len": [100, 21, 27, 28, 16, 84, 32, 36, 20, 15]}

 75%|███████▍  | 749999/1000000 [10:52:50<2:45:22, 25.19it/s]global step 750000, trans_decision ep_re 44.960846585175744

{"global_step": 750000, "eval_re": [22.087112306183773, 36.02006084660291, 
103.33822030623703, 12.986444186303189, 20.060738591341273, 20.986062298596625, 
17.314907924850537, 23.144119033268225, 97.11580337254189, 96.55499698583195], 
"eval_len": [29, 38, 97, 17, 23, 27, 24, 32, 62, 96]}

 76%|███████▌  | 759999/1000000 [11:01:22<2:37:59, 25.32it/s]global step 760000, trans_decision ep_re 20.614034639050306

{"global_step": 760000, "eval_re": [18.70813656787303, 15.78015068395407, 
27.212873193732747, 17.335167880071232, 28.999527801133592, 22.278520833804624, 
15.817665740288001, 30.14434033599078, 10.573497974814758, 19.29046537884021], 
"eval_len": [22, 28, 28, 18, 29, 30, 21, 30, 28, 29]}

 77%|███████▋  | 769997/1000000 [11:10:20<2:29:52, 25.58it/s]global step 770000, trans_decision ep_re 38.55713549801591

{"global_step": 770000, "eval_re": [19.493573321072194, 24.201868055146413, 
12.144150364068539, 24.447403073254687, 14.85566965854345, 19.869792591428148, 
21.016749634445134, 143.78899936026008, 30.88326441275058, 74.86988450918989], 
"eval_len": [22, 31, 15, 31, 17, 24, 27, 99, 32, 61]}

 78%|███████▊  | 779998/1000000 [11:18:55<2:25:11, 25.25it/s]global step 780000, trans_decision ep_re 35.76291840542528

{"global_step": 780000, "eval_re": [15.141559954459051, 15.567017159404458, 
17.513907290823457, 18.43957610826736, 22.611890916851557, 15.521465647932454, 
10.94845598756605, 187.60964478136387, 24.33348115674536, 29.94218505083915], 
"eval_len": [19, 18, 21, 27, 29, 18, 16, 108, 22, 27]}

 79%|███████▉  | 789998/1000000 [11:27:43<2:19:27, 25.10it/s]global step 790000, trans_decision ep_re 94.52525465181918

{"global_step": 790000, "eval_re": [54.61375013545789, 69.75452959229582, 
20.418448277711462, 260.27999900704054, 15.89178334644849, 85.97580066118516, 
110.68169352757107, 20.061562259204003, 170.85029693248055, 136.72468277879688],
"eval_len": [66, 51, 20, 133, 17, 68, 72, 29, 96, 77]}

 80%|███████▉  | 799999/1000000 [11:36:32<2:13:11, 25.03it/s]global step 800000, trans_decision ep_re 24.366141922343083

{"global_step": 800000, "eval_re": [47.10002551519266, 25.270804729574557, 
16.071027339262184, 24.895575036645248, 15.263720550177652, 23.573756311751314, 
16.709040612537706, 23.293771761848294, 30.703028584219545, 20.78066878222172], 
"eval_len": [40, 27, 20, 24, 23, 25, 19, 27, 29, 27]}

 81%|████████  | 809999/1000000 [11:45:30<2:05:23, 25.25it/s]global step 810000, trans_decision ep_re 122.45168847091085

{"global_step": 810000, "eval_re": [59.602451919987175, 86.08333378817488, 
84.83099986226505, 20.674982903448935, 160.39923756695958, 211.25775547427216, 
82.43762511618021, 165.291170567754, 142.8646987014745, 211.07462880859194], 
"eval_len": [61, 80, 49, 24, 96, 102, 62, 98, 81, 94]}

 82%|████████▏ | 819997/1000000 [11:54:05<1:57:45, 25.48it/s]global step 820000, trans_decision ep_re 69.70493777104828

{"global_step": 820000, "eval_re": [12.225972584720626, 30.34884499466567, 
147.13716637738113, 14.647686293920117, 23.082658590223406, 26.745115742386254, 
15.507347855923978, 212.46989628555107, 202.33705109461923, 12.547637891091348],
"eval_len": [26, 25, 94, 19, 28, 31, 21, 110, 105, 14]}

 83%|████████▎ | 829999/1000000 [12:02:52<1:52:17, 25.23it/s]global step 830000, trans_decision ep_re 30.280438649065797

{"global_step": 830000, "eval_re": [17.70497681847004, 16.721700544527465, 
18.918595107451782, 14.656798341369072, 12.992062745988216, 115.24169572331647, 
29.44090150324748, 22.320324604735408, 17.074950906127675, 37.73238019542437], 
"eval_len": [21, 25, 26, 28, 26, 71, 25, 28, 24, 41]}

 84%|████████▍ | 839999/1000000 [12:11:41<1:45:50, 25.20it/s]global step 840000, trans_decision ep_re 23.187094962991583

{"global_step": 840000, "eval_re": [30.828685054788952, 28.28259401625324, 
15.333732071485626, 21.69506946414206, 15.50504622054147, 19.24956283458756, 
13.13740788266177, 19.08336106726879, 20.626784940541956, 48.12870607764441], 
"eval_len": [30, 27, 20, 29, 22, 28, 25, 20, 30, 42]}

 85%|████████▍ | 849997/1000000 [12:20:40<1:37:55, 25.53it/s]global step 850000, trans_decision ep_re 24.762537221553117

{"global_step": 850000, "eval_re": [14.077779767709984, 19.01675731352341, 
94.54270030208995, 19.466765733336384, 20.157625345818914, 18.923327065697052, 
11.791729734776869, 12.445340343172251, 27.101703126322118, 10.10164348308424], 
"eval_len": [22, 22, 63, 26, 25, 25, 16, 24, 25, 12]}

 86%|████████▌ | 859998/1000000 [12:29:20<1:30:52, 25.67it/s]global step 860000, trans_decision ep_re 27.247665484333925

{"global_step": 860000, "eval_re": [20.747795491176824, 20.020074965577983, 
12.039988606669816, 15.583617890326122, 76.98474758934684, 54.24493175065205, 
23.737583874146512, 11.798029434406857, 22.965390512234325, 14.3544947288019], 
"eval_len": [20, 27, 14, 22, 82, 51, 29, 22, 22, 19]}

 87%|████████▋ | 869999/1000000 [12:37:51<1:25:55, 25.22it/s]global step 870000, trans_decision ep_re 45.275620279801856

{"global_step": 870000, "eval_re": [25.2892749963662, 246.02274334644673, 
40.91550703249603, 28.14181262471839, 21.533818991364743, 16.038263209106944, 
40.387481402454156, 10.603364376693953, 13.067517473951401, 10.75641934442005], 
"eval_len": [28, 118, 36, 35, 26, 23, 42, 16, 18, 13]}

 88%|████████▊ | 879999/1000000 [12:46:32<1:18:33, 25.46it/s]global step 880000, trans_decision ep_re 40.45095089909464

{"global_step": 880000, "eval_re": [32.5345689509379, 19.532063233343376, 
23.056617238843284, 22.71073942728607, 80.5865691943484, 26.029335008046132, 
15.609675286477714, 13.470562724387285, 14.404844244414035, 156.57453368286224],
"eval_len": [33, 23, 29, 25, 63, 27, 19, 26, 16, 85]}

 89%|████████▉ | 889998/1000000 [12:55:13<1:11:30, 25.64it/s]global step 890000, trans_decision ep_re 27.170745086449323

{"global_step": 890000, "eval_re": [25.02368589487909, 38.15970036273149, 
26.239264857976767, 22.209104950825875, 23.25558542599414, 20.408641648030695, 
14.943771785964932, 26.344746121464738, 55.09686118305219, 20.026088633573348], 
"eval_len": [29, 44, 40, 27, 27, 28, 22, 29, 46, 27]}

 90%|████████▉ | 899997/1000000 [13:03:55<1:05:54, 25.29it/s]global step 900000, trans_decision ep_re 29.840596666459124

{"global_step": 900000, "eval_re": [31.015500172869785, 21.105188676241685, 
24.196820032846762, 15.25342849334097, 17.126639023058697, 31.752107450717318, 
11.459810063312032, 20.275029334638916, 111.9089888811737, 14.312454536391359], 
"eval_len": [30, 28, 26, 21, 26, 37, 14, 20, 76, 21]}

 91%|█████████ | 909997/1000000 [13:12:50<59:29, 25.21it/s]global step 910000, trans_decision ep_re 34.88713562623739

{"global_step": 910000, "eval_re": [19.498130470346098, 28.13802738710104, 
11.212524682199058, 34.7840693743777, 19.891771534993477, 13.861242543712622, 
164.67440757055473, 21.554863296114505, 13.0648021579762, 22.191517244998387], 
"eval_len": [26, 26, 16, 39, 20, 21, 87, 32, 29, 29]}

 92%|█████████▏| 919999/1000000 [13:21:30<52:51, 25.22it/s]global step 920000, trans_decision ep_re 53.08393944616061

{"global_step": 920000, "eval_re": [21.929107485544975, 42.10933488448471, 
40.82043734963321, 26.752937348892658, 26.432096657613403, 109.57820648964895, 
56.659508357320995, 23.227570425302346, 40.39439467354295, 142.935800789622], 
"eval_len": [28, 37, 42, 30, 23, 80, 47, 22, 44, 80]}

 93%|█████████▎| 929999/1000000 [13:30:10<46:01, 25.35it/s]global step 930000, trans_decision ep_re 68.26753499976147

{"global_step": 930000, "eval_re": [18.59179629206125, 80.08131696847825, 
11.78712041189618, 321.9846616871984, 40.1734688891866, 127.9347411582165, 
28.802115729883223, 13.828806069357315, 25.749556093399843, 13.741766697937067],
"eval_len": [21, 74, 14, 154, 42, 68, 30, 17, 34, 15]}

 94%|█████████▍| 939999/1000000 [13:38:42<39:47, 25.13it/s]global step 940000, trans_decision ep_re 87.40165884103291

{"global_step": 940000, "eval_re": [115.06180409896851, 22.019896277041767, 
38.888117679300635, 11.044107652827568, 200.27737779584106, 13.489041086678084, 
30.275533511328966, 161.51895171909666, 222.21358173590755, 59.22817685333839], 
"eval_len": [70, 27, 36, 16, 100, 16, 41, 93, 122, 43]}

 95%|█████████▍| 949997/1000000 [13:47:26<33:13, 25.08it/s]global step 950000, trans_decision ep_re 21.30886847429227

{"global_step": 950000, "eval_re": [17.339487340267322, 19.30217715417009, 
11.822558725470518, 21.80421672640161, 17.980321391650435, 14.652899802333838, 
25.797275820249975, 39.81819256486626, 11.042207463780462, 33.5293477537322], 
"eval_len": [31, 22, 15, 25, 22, 19, 27, 41, 14, 38]}

 96%|█████████▌| 959999/1000000 [13:56:20<26:05, 25.56it/s]global step 960000, trans_decision ep_re 53.91266458656429

{"global_step": 960000, "eval_re": [18.195107119622165, 21.153605831562572, 
32.874839543204956, 65.0569894038112, 155.23687993453632, 15.796665924310576, 
41.9491339794205, 15.079428746083286, 29.330971250617555, 144.45302413247376], 
"eval_len": [20, 23, 43, 53, 85, 34, 38, 16, 38, 95]}

 97%|█████████▋| 969997/1000000 [14:05:10<20:26, 24.46it/s]global step 970000, trans_decision ep_re 38.283160032972425

{"global_step": 970000, "eval_re": [24.59123742067073, 27.94861576349694, 
13.881067798528564, 179.42962299305196, 24.88141860954414, 24.84460389432627, 
11.652078991825602, 24.106568149617388, 14.604871458925492, 36.891515249737175],
"eval_len": [36, 40, 19, 80, 34, 31, 22, 25, 22, 40]}

 98%|█████████▊| 979999/1000000 [14:13:52<13:25, 24.84it/s]global step 980000, trans_decision ep_re 41.460542078460115

{"global_step": 980000, "eval_re": [80.607955388027, 14.383621707986237, 
13.641037982436963, 13.733768823384265, 85.81076942569412, 65.7456161374333, 
14.164523640116311, 26.359868916649926, 15.94222862038933, 84.21603014248377], 
"eval_len": [81, 20, 16, 15, 86, 43, 19, 29, 18, 58]}

 99%|█████████▉| 989998/1000000 [14:22:45<06:40, 24.99it/s]global step 990000, trans_decision ep_re 36.729630850384765

{"global_step": 990000, "eval_re": [19.472798897795887, 12.341136860006763, 
17.83447918783407, 126.2328295146303, 13.300025415436405, 39.35109932085996, 
27.08021162352276, 19.868441845866865, 75.30851564895322, 16.50677018894147], 
"eval_len": [24, 20, 21, 88, 18, 41, 29, 26, 48, 24]}

100%|█████████▉| 999998/1000000 [14:31:35<00:00, 25.43it/s]global step 1000000, trans_decision ep_re 21.58691790938976

{"global_step": 1000000, "eval_re": [19.745131114662826, 21.359727990584226, 
29.179723708736354, 14.94897745053378, 17.207338958542667, 28.53373885650357, 
22.176390454428386, 30.627458879911007, 12.360759823701624, 19.72993185629314], 
"eval_len": [27, 23, 31, 23, 22, 28, 29, 30, 21, 21]}

100%|██████████| 1000000/1000000 [14:31:49<00:00, 19.12it/s]
