
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.2
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9996/1000000 [03:23<8:27:50, 32.49it/s]global step 10000, trans_decision ep_re 59.71766567274034

{"global_step": 10000, "eval_re": [13.149057299807062, 12.60509423138211, 
167.55656999434274, 79.75307523913895, 62.34971395335126, 59.131700258423834, 
10.824562073976642, 66.07022941692931, 51.230567606762314, 74.50608665328915], 
"eval_len": [17, 18, 118, 52, 54, 57, 19, 63, 49, 69]}

  2%|▏         | 19999/1000000 [10:07<8:23:38, 32.43it/s]global step 20000, trans_decision ep_re 52.66719655314121

{"global_step": 20000, "eval_re": [13.336462124568193, 84.13507196773084, 
46.83393038072818, 59.639627082050424, 29.284889923795085, 8.576898502184847, 
67.23501581894622, 10.473113838725595, 108.38950324087571, 98.76745265180705], 
"eval_len": [18, 60, 34, 61, 28, 12, 54, 13, 75, 63]}

  3%|▎         | 29999/1000000 [16:52<8:31:15, 31.62it/s]global step 30000, trans_decision ep_re 96.78204791099063

{"global_step": 30000, "eval_re": [15.890064517292904, 80.65575628286736, 
200.68391861617138, 42.6529197657638, 133.27536079577155, 167.7980303539678, 
96.52833962907593, 111.119784261768, 73.17830872335527, 46.03799616387228], 
"eval_len": [16, 55, 148, 35, 99, 116, 94, 68, 74, 46]}

  4%|▍         | 39997/1000000 [23:38<8:16:21, 32.23it/s]global step 40000, trans_decision ep_re 100.87234183387403

{"global_step": 40000, "eval_re": [20.27638787746928, 160.04038222402716, 
12.153896915539462, 64.88676325835752, 172.5819057021922, 71.65016721594873, 
141.50765450392956, 64.71599622881439, 173.0347030257071, 127.87556138675485], 
"eval_len": [24, 123, 14, 57, 94, 53, 84, 39, 127, 81]}

  5%|▍         | 49999/1000000 [30:24<8:18:46, 31.74it/s]global step 50000, trans_decision ep_re 74.98024334186427

{"global_step": 50000, "eval_re": [153.18026250809334, 92.62081457660713, 
64.61449014559436, 114.66781653472144, 72.60445958642585, 59.67131089580361, 
11.927276600842514, 103.28035156397155, 67.16436118481751, 10.071289821765255], 
"eval_len": [90, 58, 64, 63, 60, 58, 16, 59, 56, 13]}

  6%|▌         | 59999/1000000 [37:10<8:05:58, 32.24it/s]global step 60000, trans_decision ep_re 90.27299292491799

{"global_step": 60000, "eval_re": [8.983568571335866, 87.44896207853816, 
17.317335585954417, 271.0737857293303, 90.79343281526361, 105.39637048589921, 
45.253509382298205, 100.6955836344011, 109.08654460236153, 66.68083636379748], 
"eval_len": [14, 71, 18, 161, 76, 90, 44, 83, 82, 55]}

  7%|▋         | 69997/1000000 [43:55<7:58:49, 32.37it/s]global step 70000, trans_decision ep_re 114.62100678364038

{"global_step": 70000, "eval_re": [72.37375881095971, 107.3486766052692, 
170.49848115452303, 52.50470143303752, 199.2502634885867, 79.91094644615858, 
57.442753850444234, 134.47267499065472, 132.42745132856513, 139.98035972820503],
"eval_len": [44, 80, 122, 54, 123, 57, 64, 70, 93, 119]}

  8%|▊         | 79999/1000000 [50:40<7:53:01, 32.42it/s]global step 80000, trans_decision ep_re 95.82803783517224

{"global_step": 80000, "eval_re": [13.144762010910416, 135.2314933328524, 
108.67983784912127, 77.15459896122415, 83.30318758536345, 37.6161721935822, 
38.322327228876354, 105.12980921638585, 277.5589206835246, 82.13926928988154], 
"eval_len": [18, 105, 93, 50, 53, 35, 47, 99, 166, 53]}

  9%|▉         | 89998/1000000 [57:25<7:50:14, 32.25it/s]global step 90000, trans_decision ep_re 79.8607409646901

{"global_step": 90000, "eval_re": [136.1282301488254, 121.89025518020699, 
102.39440254180036, 10.603137128631099, 124.51604819852481, 11.3333160803306, 
41.01244467088744, 143.0079702145006, 39.01734468091356, 68.70426080228005], 
"eval_len": [111, 87, 97, 14, 74, 14, 48, 95, 48, 40]}

 10%|▉         | 99999/1000000 [1:04:08<7:56:53, 31.45it/s]global step 100000, trans_decision ep_re 120.74738021161502

{"global_step": 100000, "eval_re": [279.83381468194074, 189.51020155451928, 
163.7206836565447, 12.300804746230433, 88.04988162249715, 13.11453991330648, 
134.59863807277165, 138.30454082150905, 176.785210801098, 11.255486245732682], 
"eval_len": [148, 131, 85, 14, 49, 15, 80, 75, 109, 18]}

 11%|█         | 109999/1000000 [1:10:54<7:46:02, 31.83it/s]global step 110000, trans_decision ep_re 114.6083985055568

{"global_step": 110000, "eval_re": [237.436086300425, 10.85071174008673, 
148.2734546755669, 150.1167352061585, 95.5840834173056, 123.231043538356, 
71.13179065623987, 16.521459851588876, 140.26386514353405, 152.67475452630663], 
"eval_len": [139, 14, 77, 108, 74, 87, 64, 17, 111, 101]}

 12%|█▏        | 119997/1000000 [1:17:43<7:47:46, 31.35it/s]global step 120000, trans_decision ep_re 90.40147156815875

{"global_step": 120000, "eval_re": [99.76793667907424, 106.6191495100753, 
9.39520475775887, 88.1044359346563, 118.5111503508049, 92.6961161290242, 
109.33998507918758, 87.4787784033812, 63.15887196349959, 128.94308687412533], 
"eval_len": [71, 75, 13, 81, 115, 76, 94, 55, 39, 85]}

 13%|█▎        | 129999/1000000 [1:24:32<7:35:15, 31.85it/s]global step 130000, trans_decision ep_re 68.4280485038464

{"global_step": 130000, "eval_re": [114.33333751589188, 104.2409289709546, 
106.19533821756545, 12.861583153728038, 57.761943778927915, 99.67057245051679, 
33.560080022173274, 133.1616379053692, 10.314773789033975, 12.18028923430295], 
"eval_len": [74, 85, 74, 16, 48, 65, 33, 103, 16, 14]}

 14%|█▍        | 139997/1000000 [1:31:30<7:30:07, 31.84it/s]global step 140000, trans_decision ep_re 95.56341892612093

{"global_step": 140000, "eval_re": [83.25798679430433, 76.3688679118045, 
89.79904764317948, 94.26777418603885, 91.5568269976434, 177.57713392876065, 
83.36618474071471, 64.58962499775599, 123.13772335965267, 71.71301870135467], 
"eval_len": [69, 57, 95, 62, 63, 130, 71, 58, 88, 48]}

 15%|█▍        | 149997/1000000 [1:38:04<7:14:45, 32.59it/s]global step 150000, trans_decision ep_re 111.49999816796728

{"global_step": 150000, "eval_re": [61.34568491643662, 124.99121271584247, 
89.34287818642743, 147.43545265040683, 181.94593136149098, 65.53471650509408, 
16.042992186366952, 81.96930111589, 85.7001216518916, 260.69169038982596], 
"eval_len": [37, 89, 53, 101, 127, 40, 21, 56, 53, 145]}

 16%|█▌        | 159999/1000000 [1:44:48<7:15:01, 32.18it/s]global step 160000, trans_decision ep_re 73.63145765245953

{"global_step": 160000, "eval_re": [148.80860864794712, 90.30833890281313, 
10.218098983653812, 82.87389467694884, 16.466557132603818, 7.169788586434799, 
146.2545973218924, 10.374470776839447, 152.92180405549178, 70.9184174399702], 
"eval_len": [76, 87, 13, 54, 18, 13, 86, 13, 93, 50]}

 17%|█▋        | 169999/1000000 [1:51:34<7:01:01, 32.86it/s]global step 170000, trans_decision ep_re 88.00160769793948

{"global_step": 170000, "eval_re": [14.367828895420296, 165.02085120266167, 
13.708293266200634, 112.77251635626516, 10.822911949590853, 93.55806820930137, 
122.97839127951113, 11.234838927091022, 151.18513519450855, 184.36724169884403],
"eval_len": [19, 125, 27, 58, 16, 69, 87, 13, 82, 123]}

 18%|█▊        | 179998/1000000 [1:58:15<7:03:10, 32.30it/s]global step 180000, trans_decision ep_re 130.86354927920655

{"global_step": 180000, "eval_re": [101.30790801502883, 100.25070074704226, 
136.8545285006053, 208.47900527397954, 86.59351569622442, 82.79890889019612, 
131.01501777931597, 102.92780057123274, 76.97777738410457, 281.43032993433576], 
"eval_len": [78, 66, 79, 96, 88, 49, 103, 58, 58, 239]}

 19%|█▉        | 189999/1000000 [2:05:03<6:57:24, 32.34it/s]global step 190000, trans_decision ep_re 64.39039139420116

{"global_step": 190000, "eval_re": [9.817240699024879, 17.123827496269527, 
10.875604054975701, 137.1532618173728, 85.8398166067231, 103.31245008326866, 
31.389782672018057, 33.05947485937241, 85.99208775045672, 129.3403679025298], 
"eval_len": [12, 17, 17, 88, 69, 57, 61, 35, 62, 82]}

 20%|█▉        | 199999/1000000 [2:11:46<6:50:58, 32.44it/s]global step 200000, trans_decision ep_re 62.37216779178218

{"global_step": 200000, "eval_re": [128.06503710111406, 17.200491191654585, 
10.835966159088363, 15.847814555498097, 10.669429729775286, 7.750995983931366, 
12.093436524292676, 163.47731249597012, 95.62043756122146, 162.16075661527574], 
"eval_len": [77, 17, 14, 17, 17, 12, 14, 129, 74, 86]}

 21%|██        | 209999/1000000 [2:18:29<6:48:08, 32.26it/s]global step 210000, trans_decision ep_re 76.55968624225774

{"global_step": 210000, "eval_re": [171.47393472607175, 72.1043102390232, 
19.040582638957982, 13.744427041657595, 102.40124084262003, 115.82653087127004, 
34.68595362076798, 67.32572093508074, 101.8311056605013, 67.16305584662676], 
"eval_len": [113, 48, 20, 17, 85, 66, 63, 49, 67, 65]}

 22%|██▏       | 219999/1000000 [2:25:12<6:38:29, 32.62it/s]global step 220000, trans_decision ep_re 80.64235714556588

{"global_step": 220000, "eval_re": [133.67363533426777, 81.67613059146184, 
99.83220187441393, 160.67604103305828, 104.42598518582317, 45.409812412983406, 
13.57730520878377, 16.155506620976052, 11.297915376541267, 139.6990378173493], 
"eval_len": [67, 51, 71, 117, 70, 44, 15, 23, 13, 93]}

 23%|██▎       | 229999/1000000 [2:31:58<6:45:31, 31.65it/s]global step 230000, trans_decision ep_re 36.62324860036369

{"global_step": 230000, "eval_re": [12.916768785685354, 66.89552638024028, 
15.956160610492196, 15.444289631577695, 96.94985902829096, 16.346422748327175, 
12.671517776385611, 9.88698766438791, 108.09045835892974, 11.074495019319928], 
"eval_len": [15, 40, 23, 16, 81, 17, 16, 19, 71, 14]}

 24%|██▍       | 239999/1000000 [2:38:44<6:40:37, 31.62it/s]global step 240000, trans_decision ep_re 52.8097979549094

{"global_step": 240000, "eval_re": [73.31725978711175, 23.887500509127342, 
77.38892203735453, 34.076546259444115, 91.48472140875953, 155.99865050884546, 
18.18049885734972, 15.291715706471644, 28.720472512607582, 9.751691962022376], 
"eval_len": [58, 24, 44, 37, 73, 87, 20, 23, 34, 15]}

 25%|██▍       | 249999/1000000 [2:45:31<6:31:45, 31.91it/s]global step 250000, trans_decision ep_re 92.43310256405178

{"global_step": 250000, "eval_re": [299.9365252838389, 10.657517361868889, 
11.476146088747402, 10.579887840347173, 243.0409226978561, 94.15473411607232, 
160.10803595402044, 66.34333635870406, 13.35080044150043, 14.68311949756221], 
"eval_len": [171, 16, 17, 13, 259, 84, 92, 47, 16, 16]}

 26%|██▌       | 259997/1000000 [2:52:17<6:26:46, 31.89it/s]global step 260000, trans_decision ep_re 102.99565531158903

{"global_step": 260000, "eval_re": [19.21814137062343, 17.29543823310036, 
64.71049766737018, 58.071582540631894, 136.39277729866288, 97.98108927165391, 
136.35822906004597, 154.7465760186489, 206.46782051261545, 138.71440114253733], 
"eval_len": [22, 18, 46, 91, 72, 55, 127, 145, 92, 68]}

 27%|██▋       | 269999/1000000 [2:59:04<6:20:03, 32.01it/s]global step 270000, trans_decision ep_re 63.7345304515394

{"global_step": 270000, "eval_re": [17.25739029745673, 13.548434415197873, 
12.432962690853513, 76.43941747079404, 15.78920039791017, 12.36629949301665, 
235.36817133104623, 63.10540974343716, 106.03124587945307, 85.0067727962286], 
"eval_len": [27, 25, 21, 66, 19, 15, 130, 52, 68, 50]}

 28%|██▊       | 279999/1000000 [3:05:51<6:11:30, 32.30it/s]global step 280000, trans_decision ep_re 108.36170018015662

{"global_step": 280000, "eval_re": [76.06516301111454, 22.99307125677376, 
197.83856657834593, 115.6984672485931, 13.810491671342609, 9.564815370472978, 
118.0434612544379, 74.24818668719942, 228.26845647273993, 227.0863222505461], 
"eval_len": [46, 25, 152, 68, 16, 14, 88, 46, 113, 109]}

 29%|██▉       | 289997/1000000 [3:12:37<6:11:10, 31.88it/s]global step 290000, trans_decision ep_re 97.42955485908683

{"global_step": 290000, "eval_re": [134.33383663239852, 339.86181210846365, 
70.74903069218871, 163.47637273641598, 11.88102397951653, 11.462701338497364, 
142.30535448668195, 12.403168690179628, 70.67787949116206, 17.1443684353639], 
"eval_len": [76, 132, 59, 117, 15, 23, 74, 15, 59, 18]}

 30%|██▉       | 299997/1000000 [3:19:24<6:08:22, 31.67it/s]global step 300000, trans_decision ep_re 96.24565259421989

{"global_step": 300000, "eval_re": [81.64546093203474, 154.12688695736637, 
78.64221287986247, 106.29421086945345, 119.29140601409085, 40.89761170949802, 
180.12719424842348, 58.72553634056347, 41.89793889059963, 100.80806710030654], 
"eval_len": [76, 78, 63, 61, 75, 52, 140, 51, 60, 64]}

 31%|███       | 309999/1000000 [3:26:11<5:58:42, 32.06it/s]global step 310000, trans_decision ep_re 58.555416879037544

{"global_step": 310000, "eval_re": [62.7026518418906, 9.581703246243839, 
70.18217193232785, 35.61393814827771, 14.940266500921428, 65.16337169135399, 
71.39514751834459, 11.26474629738465, 137.80765749501597, 106.90251411861483], 
"eval_len": [54, 14, 98, 52, 18, 42, 50, 17, 91, 64]}

 32%|███▏      | 319999/1000000 [3:32:57<5:57:29, 31.70it/s]global step 320000, trans_decision ep_re 66.94609265035436

{"global_step": 320000, "eval_re": [66.61022093987076, 90.16946632913236, 
261.3557252725009, 35.589260329889235, 83.6124597493972, 11.067944073814722, 
58.81049349100391, 12.06767300598444, 12.024373443390735, 38.153309868559454], 
"eval_len": [58, 62, 144, 47, 58, 19, 61, 15, 15, 48]}

 33%|███▎      | 329999/1000000 [3:39:43<5:50:42, 31.84it/s]global step 330000, trans_decision ep_re 70.53408645036129

{"global_step": 330000, "eval_re": [9.782820644996626, 98.11107423783086, 
113.37516666940293, 76.3814899595581, 11.195467696374918, 12.376789740137344, 
66.96680807787881, 139.12066130186096, 93.09508619784413, 84.93549997772817], 
"eval_len": [13, 54, 60, 57, 18, 13, 41, 110, 52, 57]}

 34%|███▍      | 339999/1000000 [3:46:40<5:46:09, 31.78it/s]global step 340000, trans_decision ep_re 63.917804097281135

{"global_step": 340000, "eval_re": [148.11839541669025, 9.689938921470414, 
10.137931069554527, 61.551449363773024, 81.63742937907139, 166.12761964261355, 
13.097908133982935, 9.969167085821695, 16.042986352963144, 122.80521560687048], 
"eval_len": [90, 15, 14, 72, 47, 113, 16, 23, 17, 92]}

 35%|███▍      | 349999/1000000 [3:53:14<5:33:56, 32.44it/s]global step 350000, trans_decision ep_re 105.48166493871872

{"global_step": 350000, "eval_re": [173.4777996769266, 109.82712848337312, 
157.73074359270242, 11.389931376737614, 9.98989036744803, 104.55288985325892, 
21.68644924024722, 199.39348257873067, 97.20845645860997, 169.55987775915284], 
"eval_len": [110, 79, 84, 20, 16, 57, 19, 88, 65, 92]}

 36%|███▌      | 359999/1000000 [3:59:54<5:25:03, 32.82it/s]global step 360000, trans_decision ep_re 112.66204922569031

{"global_step": 360000, "eval_re": [11.120036514270174, 107.9938191416946, 
111.21519251770164, 89.24461139483012, 109.56808888917615, 140.62954119590188, 
171.373580585427, 125.361057484933, 87.84950552883603, 172.26505900413255], 
"eval_len": [15, 71, 74, 50, 96, 137, 129, 91, 54, 110]}

 37%|███▋      | 369998/1000000 [4:06:34<5:21:19, 32.68it/s]global step 370000, trans_decision ep_re 78.23351304268188

{"global_step": 370000, "eval_re": [15.395647802813123, 271.6639129938165, 
18.213741856744804, 175.69043758282925, 11.326586699111205, 8.67538751614677, 
126.65984821123932, 71.37503736586767, 72.41543823147363, 10.919092166776416], 
"eval_len": [16, 230, 18, 105, 15, 13, 99, 49, 49, 16]}

 38%|███▊      | 379997/1000000 [4:13:19<5:21:22, 32.15it/s]global step 380000, trans_decision ep_re 68.80481666152298

{"global_step": 380000, "eval_re": [53.088488858117316, 60.49448931384397, 
16.008629422476723, 73.83902273335033, 71.2633649313504, 29.434291457920587, 
13.550204940458318, 117.65738197762376, 96.29185953777795, 156.4204334423105], 
"eval_len": [33, 37, 19, 50, 41, 36, 17, 74, 76, 104]}

 39%|███▉      | 389997/1000000 [4:20:05<5:18:42, 31.90it/s]global step 390000, trans_decision ep_re 79.08972231725733

{"global_step": 390000, "eval_re": [129.27628918106953, 80.07422453454731, 
153.5031054222321, 13.706751533898643, 9.558147424381268, 102.80496868239928, 
103.1627417975936, 97.39854703481193, 13.468140762860903, 87.94430679877874], 
"eval_len": [82, 69, 92, 16, 14, 62, 55, 63, 16, 67]}

 40%|███▉      | 399997/1000000 [4:27:00<5:09:52, 32.27it/s]global step 400000, trans_decision ep_re 105.37681751822981

{"global_step": 400000, "eval_re": [147.23976434229908, 10.923500566213633, 
153.5910172509746, 13.988571191453909, 195.73198984364885, 97.67538746153717, 
67.23713043282847, 82.15332576491329, 181.5337148789866, 103.69377344944277], 
"eval_len": [78, 17, 83, 17, 89, 81, 53, 57, 104, 82]}

 41%|████      | 409999/1000000 [4:33:32<5:08:09, 31.91it/s]global step 410000, trans_decision ep_re 85.29648497848959

{"global_step": 410000, "eval_re": [197.1444026378731, 84.02827736201438, 
92.6151861349255, 93.7918312234154, 30.1426709297321, 92.16432662326204, 
129.57496324067165, 7.818562722110316, 114.48984722613122, 11.194781684760022], 
"eval_len": [119, 47, 77, 75, 62, 63, 82, 12, 81, 13]}

 42%|████▏     | 419999/1000000 [4:40:15<5:01:28, 32.06it/s]global step 420000, trans_decision ep_re 75.24578495414973

{"global_step": 420000, "eval_re": [160.8690706248612, 128.26819967354112, 
16.35582424432209, 98.93842534965839, 70.11764498816892, 79.28503790635922, 
18.77613585186303, 7.651490934029107, 161.07550842530165, 11.120511543392533], 
"eval_len": [93, 93, 40, 77, 87, 57, 18, 10, 86, 17]}

 43%|████▎     | 429999/1000000 [4:46:58<4:55:08, 32.19it/s]global step 430000, trans_decision ep_re 67.16281617331325

{"global_step": 430000, "eval_re": [30.366690233000686, 69.01293323525721, 
14.865525004548664, 114.94242665875004, 17.53836746180721, 64.65583278547142, 
64.00169108898109, 116.55638215086648, 79.30146463427226, 100.38684848017743], 
"eval_len": [41, 53, 17, 68, 37, 51, 45, 73, 51, 64]}

 44%|████▍     | 439999/1000000 [4:53:41<4:52:03, 31.96it/s]global step 440000, trans_decision ep_re 82.00123865112661

{"global_step": 440000, "eval_re": [144.07703309585227, 55.457900802335075, 
13.170483860793901, 23.467295010638477, 129.44884880615183, 135.0013175081326, 
191.09843963965756, 7.878952551139764, 109.91717233692454, 10.494942899640277], 
"eval_len": [71, 88, 14, 21, 105, 71, 120, 18, 70, 14]}

 45%|████▍     | 449999/1000000 [5:00:28<4:48:41, 31.75it/s]global step 450000, trans_decision ep_re 85.5674902573048

{"global_step": 450000, "eval_re": [155.86063056570651, 156.21346513167973, 
10.754322692446177, 111.94681141978916, 204.15340154040007, 27.16270368897394, 
23.764513371986695, 14.762484125443885, 135.76506226526757, 15.29150777135421], 
"eval_len": [121, 87, 13, 91, 158, 32, 22, 27, 103, 17]}

 46%|████▌     | 459997/1000000 [5:07:14<4:37:59, 32.38it/s]global step 460000, trans_decision ep_re 72.31758012728535

{"global_step": 460000, "eval_re": [148.84181174805693, 95.29969683563927, 
31.8376351363782, 72.57049107343151, 218.04684369426096, 15.656802902634569, 
10.655383277727235, 96.30960640056657, 20.623539306822728, 13.333990897335532], 
"eval_len": [87, 66, 49, 59, 104, 16, 14, 52, 34, 16]}

 47%|████▋     | 469997/1000000 [5:13:57<4:33:31, 32.29it/s]global step 470000, trans_decision ep_re 89.57487149337994

{"global_step": 470000, "eval_re": [12.327740773412776, 92.6518585006774, 
99.47411881978844, 11.888763703161654, 7.4950530881840285, 160.33448587563075, 
317.6880599149288, 38.528425704055934, 11.771183650090249, 143.58902490386942], 
"eval_len": [17, 59, 54, 14, 10, 95, 137, 43, 17, 84]}

 48%|████▊     | 479997/1000000 [5:20:42<4:30:10, 32.08it/s]global step 480000, trans_decision ep_re 91.51586960123413

{"global_step": 480000, "eval_re": [108.81722254691968, 26.987592958558842, 
10.22211766168575, 75.23051598570152, 112.79450824452445, 162.08682496810755, 
13.445690650196678, 79.8166779095433, 181.32848882532613, 144.42905626177753], 
"eval_len": [58, 53, 15, 53, 67, 79, 16, 55, 105, 91]}

 49%|████▉     | 489997/1000000 [5:27:25<4:24:51, 32.09it/s]global step 490000, trans_decision ep_re 55.608363448623905

{"global_step": 490000, "eval_re": [30.674210253857296, 17.398320961565936, 
103.54167922967609, 121.1559312206268, 139.5136033004266, 12.245021839920195, 
56.58730946297038, 34.63671867087493, 16.205635743381894, 24.125203802938966], 
"eval_len": [45, 18, 66, 74, 85, 15, 64, 45, 16, 31]}

 50%|████▉     | 499997/1000000 [5:34:06<4:14:59, 32.68it/s]global step 500000, trans_decision ep_re 87.84953621942654

{"global_step": 500000, "eval_re": [122.4559228482173, 69.84073431293426, 
13.654653447482973, 9.892231254039375, 200.41503224289016, 15.938971786910544, 
8.768226518362837, 91.68143409244706, 258.53555687697155, 87.31259881400943], 
"eval_len": [75, 49, 16, 15, 103, 18, 11, 55, 116, 56]}

 51%|█████     | 509997/1000000 [5:40:46<4:12:10, 32.39it/s]global step 510000, trans_decision ep_re 103.55821751410481

{"global_step": 510000, "eval_re": [11.406140982326736, 142.49700054755704, 
100.94751352888306, 9.682445315816484, 111.03409742279366, 155.98981392870496, 
185.90079430990167, 153.68253236444647, 13.37234723049679, 151.0694895101213], 
"eval_len": [21, 117, 57, 17, 74, 79, 116, 118, 19, 90]}

 52%|█████▏    | 519999/1000000 [5:47:32<4:09:38, 32.05it/s]global step 520000, trans_decision ep_re 89.7024645279509

{"global_step": 520000, "eval_re": [124.10513204932553, 15.098695604860227, 
267.2251524599265, 101.54443295101626, 95.81534010020737, 13.104454296438512, 
147.45154723243138, 104.96446449465907, 15.954749399836388, 11.76067669080764], 
"eval_len": [74, 18, 125, 64, 62, 16, 103, 79, 16, 13]}

 53%|█████▎    | 529999/1000000 [5:54:15<4:06:49, 31.74it/s]global step 530000, trans_decision ep_re 111.10460955841265

{"global_step": 530000, "eval_re": [142.88263924771192, 175.8281832458438, 
92.8026605124924, 134.83574687569472, 135.2946280034754, 72.64977537986708, 
12.145975407634468, 124.30586049913751, 77.88047890100887, 142.4201475112602], 
"eval_len": [116, 106, 71, 80, 107, 42, 14, 78, 44, 113]}

 54%|█████▍    | 539997/1000000 [6:01:02<4:00:22, 31.89it/s]global step 540000, trans_decision ep_re 87.92302161881652

{"global_step": 540000, "eval_re": [362.4427687587574, 16.820387786661808, 
93.13715019145214, 129.27587373178963, 115.83918022347386, 120.0437256225873, 
7.727761458811324, 9.873330695772285, 12.093396848745641, 11.976640870113743], 
"eval_len": [156, 18, 60, 81, 69, 77, 10, 14, 19, 14]}

 55%|█████▍    | 549997/1000000 [6:07:45<3:48:55, 32.76it/s]global step 550000, trans_decision ep_re 62.189127311315715

{"global_step": 550000, "eval_re": [13.809592893993345, 68.0135936102038, 
8.434205833250294, 127.35336890364212, 15.42696594139684, 194.04610916813172, 
66.40671375746307, 13.2993862481361, 9.292956118550533, 105.80838063838925], 
"eval_len": [17, 41, 11, 106, 20, 161, 49, 15, 11, 80]}

 56%|█████▌    | 559997/1000000 [6:14:27<3:46:22, 32.40it/s]global step 560000, trans_decision ep_re 95.97745075841667

{"global_step": 560000, "eval_re": [153.32784535119356, 116.50550157892832, 
82.44656661836098, 63.26332419256538, 9.440715164397558, 66.31817265749734, 
104.67538824225734, 178.23533349651765, 12.618366674392357, 172.9432936080561], 
"eval_len": [109, 79, 49, 46, 12, 52, 59, 96, 20, 92]}

 57%|█████▋    | 569997/1000000 [6:21:11<3:45:10, 31.83it/s]global step 570000, trans_decision ep_re 106.43142604015006

{"global_step": 570000, "eval_re": [12.294300818533266, 130.10315421535017, 
133.24922727845103, 141.050380444659, 211.70178874369628, 124.62501822293218, 
16.557657662967333, 137.32111667316298, 143.36518720109814, 14.046429140650238],
"eval_len": [14, 93, 109, 97, 165, 102, 21, 104, 109, 16]}

 58%|█████▊    | 579999/1000000 [6:27:54<3:38:17, 32.07it/s]global step 580000, trans_decision ep_re 124.00769767826239

{"global_step": 580000, "eval_re": [12.659830650831111, 81.8047053077756, 
109.71453593784277, 147.79932019240619, 39.018582479081545, 100.46946212922929, 
129.82149450733053, 11.3959764414879, 295.79968767313613, 311.5933814635027], 
"eval_len": [14, 56, 57, 107, 42, 63, 74, 21, 140, 168]}

 59%|█████▉    | 589997/1000000 [6:34:50<3:33:56, 31.94it/s]global step 590000, trans_decision ep_re 111.72411302907526

{"global_step": 590000, "eval_re": [148.79323429162227, 9.70318462875245, 
15.80580104212261, 256.65462837503287, 150.68954046080682, 11.369555884552078, 
244.8661739124872, 177.19002992479923, 85.66994739471022, 16.499034375866774], 
"eval_len": [91, 20, 22, 197, 94, 19, 125, 97, 116, 19]}

 60%|█████▉    | 599997/1000000 [6:41:27<3:29:48, 31.77it/s]global step 600000, trans_decision ep_re 101.88622150052574

{"global_step": 600000, "eval_re": [71.26874642982884, 158.46275821392118, 
99.98005034958105, 8.071555661836191, 182.4127987303189, 10.681844779027504, 
68.89929697659245, 192.19518726102652, 102.7766728075349, 124.11330379558979], 
"eval_len": [44, 99, 62, 12, 99, 16, 45, 105, 55, 77]}

 61%|██████    | 609997/1000000 [6:48:13<3:21:31, 32.25it/s]global step 610000, trans_decision ep_re 156.5025293281129

{"global_step": 610000, "eval_re": [113.44880526740805, 157.16860777903975, 
98.00516444580565, 119.98020021225335, 115.69682726326594, 315.52238867458806, 
164.32151606464342, 301.4006854777314, 8.53898745833747, 170.94211063805588], 
"eval_len": [71, 119, 67, 73, 76, 164, 92, 135, 13, 94]}

 62%|██████▏   | 619999/1000000 [6:54:59<3:16:49, 32.18it/s]global step 620000, trans_decision ep_re 48.81257726173728

{"global_step": 620000, "eval_re": [14.324479568207718, 13.843374212038471, 
73.492386312291, 13.797440689977737, 11.610773143194603, 125.19158422613815, 
133.78139343012307, 78.63278237766026, 5.316113935894704, 18.135444721847104], 
"eval_len": [18, 17, 49, 15, 14, 83, 72, 50, 9, 20]}

 63%|██████▎   | 629999/1000000 [7:01:42<3:11:03, 32.28it/s]global step 630000, trans_decision ep_re 97.64879124812089

{"global_step": 630000, "eval_re": [117.31685889866796, 16.95277869382436, 
138.43274796382892, 120.7555044020688, 10.182963948065213, 98.58381846613973, 
99.83938881720044, 208.21566751043045, 9.797950923083576, 156.41023285789947], 
"eval_len": [91, 17, 103, 104, 19, 67, 71, 147, 13, 130]}

 64%|██████▍   | 639998/1000000 [7:08:27<3:05:11, 32.40it/s]global step 640000, trans_decision ep_re 90.81944111955455

{"global_step": 640000, "eval_re": [177.32445905043937, 171.80137419241575, 
16.48473164936516, 30.83859257879421, 13.630580903773277, 199.75402304108738, 
27.47020659961778, 181.97811901114358, 21.750520809240005, 67.161803359669], 
"eval_len": [104, 122, 17, 60, 15, 120, 27, 122, 23, 41]}

 65%|██████▍   | 649997/1000000 [7:15:10<3:00:41, 32.28it/s]global step 650000, trans_decision ep_re 113.26288716565584

{"global_step": 650000, "eval_re": [106.65794344739517, 251.80884813803226, 
97.38239493133212, 15.347672467433513, 175.72990305710138, 150.24013885789844, 
122.72150758503493, 11.818187996943452, 109.36585636086075, 91.55641881452652], 
"eval_len": [80, 125, 62, 17, 116, 122, 81, 19, 71, 66]}

 66%|██████▌   | 659999/1000000 [7:21:51<2:53:10, 32.72it/s]global step 660000, trans_decision ep_re 123.02381106504149

{"global_step": 660000, "eval_re": [197.68217250022883, 81.98936582523928, 
15.073854519279427, 178.91624539086445, 90.73127474793637, 146.00668545906316, 
145.7121016868095, 8.330760808120651, 16.54693022310751, 349.24871948976573], 
"eval_len": [99, 60, 19, 124, 62, 86, 119, 12, 19, 149]}

 67%|██████▋   | 669998/1000000 [7:28:30<2:48:16, 32.68it/s]global step 670000, trans_decision ep_re 86.94492391509286

{"global_step": 670000, "eval_re": [34.48637597477122, 131.57456585597248, 
160.80595665317907, 8.677425484786808, 93.77274058586431, 12.19272446379077, 
330.4675100426159, 67.0571931327324, 16.682371175827548, 13.73237578138804], 
"eval_len": [61, 102, 102, 12, 54, 18, 151, 45, 17, 20]}

 68%|██████▊   | 679998/1000000 [7:35:20<2:45:04, 32.31it/s]global step 680000, trans_decision ep_re 152.77369433507945

{"global_step": 680000, "eval_re": [354.95528115893353, 185.68612719048846, 
96.53164208681456, 412.8065707028678, 14.858124204450164, 8.609451549843536, 
221.88825690513346, 19.407351190123595, 84.64819367870415, 128.34594468343516], 
"eval_len": [180, 87, 63, 166, 16, 11, 133, 23, 64, 65]}

 69%|██████▉   | 689997/1000000 [7:41:50<2:40:41, 32.15it/s]global step 690000, trans_decision ep_re 63.52572311876044

{"global_step": 690000, "eval_re": [127.79421684063242, 14.479722931375623, 
12.976326586011034, 8.722189459738141, 134.93119704798877, 120.95181691004848, 
62.31519155560894, 19.044637258459712, 119.58521464568392, 14.456717952057279], 
"eval_len": [100, 15, 16, 16, 77, 64, 40, 17, 91, 15]}

 70%|██████▉   | 699997/1000000 [7:48:29<2:32:14, 32.84it/s]global step 700000, trans_decision ep_re 47.06661371711827

{"global_step": 700000, "eval_re": [71.43945846509101, 17.651476994005723, 
9.342697641452808, 107.99420603142369, 12.630032180650138, 11.752349887459747, 
94.34842156377832, 11.311680287997472, 125.75106620256534, 8.444747916758477], 
"eval_len": [45, 18, 12, 68, 17, 17, 59, 13, 104, 11]}

 71%|███████   | 709997/1000000 [7:55:08<2:27:11, 32.84it/s]global step 710000, trans_decision ep_re 100.30622552637564

{"global_step": 710000, "eval_re": [121.30864918058779, 78.68387871536628, 
179.50255306165124, 110.39279145386323, 151.51939985333212, 119.88060583160924, 
102.04505910680525, 11.279194399873422, 11.033502994354459, 117.41662066631345],
"eval_len": [79, 58, 123, 72, 93, 75, 72, 13, 17, 62]}

 72%|███████▏  | 719996/1000000 [8:01:48<2:21:35, 32.96it/s]global step 720000, trans_decision ep_re 78.26661822949241

{"global_step": 720000, "eval_re": [10.736429549077831, 36.72799997539814, 
118.13002887907568, 149.5981082426624, 101.74448856794687, 66.93270411249011, 
105.57173589801235, 10.536869571541672, 87.09146863854882, 95.59634886017014], 
"eval_len": [17, 35, 91, 108, 83, 41, 83, 12, 55, 59]}

 73%|███████▎  | 729999/1000000 [8:08:27<2:17:06, 32.82it/s]global step 730000, trans_decision ep_re 98.11948376868438

{"global_step": 730000, "eval_re": [16.082444597631014, 95.48940333307307, 
124.75331962225461, 159.32043640100008, 130.6745711321122, 7.661398361810539, 
148.91934226955522, 87.08734482872292, 88.30031831025417, 122.90625883042999], 
"eval_len": [17, 69, 84, 92, 76, 16, 105, 64, 60, 78]}

 74%|███████▍  | 739998/1000000 [8:15:07<2:14:20, 32.26it/s]global step 740000, trans_decision ep_re 84.15525506221013

{"global_step": 740000, "eval_re": [145.4873202551444, 11.841394146475952, 
75.52091425964818, 17.20863780741539, 90.19040520767945, 108.56053684514924, 
7.131989439339518, 172.94427371201704, 106.35775886940209, 106.30932007983], 
"eval_len": [105, 14, 55, 19, 70, 75, 14, 94, 84, 79]}

 75%|███████▍  | 749998/1000000 [8:21:47<2:08:30, 32.42it/s]global step 750000, trans_decision ep_re 44.36787227869486

{"global_step": 750000, "eval_re": [13.031616729161222, 10.762379626918317, 
15.517350823923335, 17.81822047610931, 18.739658127248518, 129.33785295564454, 
17.512015936989922, 128.665732739249, 11.181292019255382, 81.11260335244904], 
"eval_len": [14, 12, 16, 17, 19, 96, 19, 86, 16, 55]}

 76%|███████▌  | 759998/1000000 [8:28:25<2:02:52, 32.55it/s]global step 760000, trans_decision ep_re 50.3370810734467

{"global_step": 760000, "eval_re": [10.913503579941015, 16.368635790865515, 
82.29666266616157, 20.12475620884666, 107.73551957829933, 8.6391790450442, 
16.209895436379842, 88.5391895752782, 140.96740837547142, 11.576060478179219], 
"eval_len": [17, 17, 63, 20, 66, 13, 20, 53, 95, 14]}

 77%|███████▋  | 769998/1000000 [8:35:04<1:58:01, 32.48it/s]global step 770000, trans_decision ep_re 72.07007383176247

{"global_step": 770000, "eval_re": [53.14437388074669, 11.618144814721965, 
103.29508240006064, 118.56179693047721, 15.129888858052881, 95.56592226995043, 
201.5720314774535, 94.6190565092302, 18.396829081116874, 8.797612095814381], 
"eval_len": [69, 16, 65, 83, 16, 92, 116, 69, 19, 11]}

 78%|███████▊  | 779998/1000000 [8:41:43<1:51:37, 32.85it/s]global step 780000, trans_decision ep_re 57.11841813313636

{"global_step": 780000, "eval_re": [159.3245034909059, 61.13867173895646, 
11.879565073480407, 88.2342530386698, 9.810877382268748, 14.244797959531631, 
8.782400377043237, 17.630332691251404, 82.73249895079806, 117.40628062845789], 
"eval_len": [109, 41, 15, 61, 17, 40, 16, 19, 59, 70]}

 79%|███████▉  | 789998/1000000 [8:48:23<1:47:18, 32.62it/s]global step 790000, trans_decision ep_re 50.531724057860416

{"global_step": 790000, "eval_re": [98.14367327027986, 9.926446667951375, 
8.58663777568454, 23.711246730204447, 89.35083682543767, 231.0718351098352, 
13.105338713302533, 12.796509085808871, 9.500556379851902, 9.124160020247768], 
"eval_len": [67, 28, 11, 25, 65, 133, 15, 15, 13, 12]}

 80%|███████▉  | 799998/1000000 [8:55:01<1:42:54, 32.39it/s]global step 800000, trans_decision ep_re 82.38529717597532

{"global_step": 800000, "eval_re": [15.86081767869479, 266.10717400930065, 
9.99545789337179, 126.14280229268998, 91.18724079492017, 20.107271067594656, 
194.11035794527453, 23.37228718189598, 60.365226058854276, 16.604336837156456], 
"eval_len": [18, 123, 18, 72, 72, 22, 126, 33, 41, 17]}

 81%|████████  | 809998/1000000 [9:01:41<1:36:12, 32.91it/s]global step 810000, trans_decision ep_re 96.76278641287553

{"global_step": 810000, "eval_re": [95.3373653851056, 137.1040750911905, 
13.794440823650943, 12.46397163712288, 86.98810467647547, 133.73119522403175, 
190.98884977025835, 13.126574005961514, 144.59362038132764, 139.49966713363065],
"eval_len": [82, 89, 15, 18, 61, 76, 92, 15, 97, 72]}

 82%|████████▏ | 819997/1000000 [9:08:21<1:31:49, 32.67it/s]global step 820000, trans_decision ep_re 64.13803921513122

{"global_step": 820000, "eval_re": [100.25910468207954, 19.720634246236067, 
73.521720367557, 14.99971367835049, 13.79708098995372, 66.3432708100875, 
110.43789520836248, 60.22517050095067, 91.86160808378959, 90.21419358394509], 
"eval_len": [86, 19, 53, 17, 19, 59, 79, 43, 73, 61]}

 83%|████████▎ | 829997/1000000 [9:15:10<1:25:59, 32.95it/s]global step 830000, trans_decision ep_re 91.85461335450333

{"global_step": 830000, "eval_re": [10.085288024156318, 80.7215271761809, 
222.18446808171248, 147.4563193420185, 98.1461457809176, 136.19256178031492, 
8.82113989303036, 81.8858135056133, 120.83735071227545, 12.215519248813465], 
"eval_len": [16, 59, 113, 95, 57, 77, 12, 50, 83, 16]}

 84%|████████▍ | 839996/1000000 [9:21:39<1:22:03, 32.50it/s]global step 840000, trans_decision ep_re 74.2689194867928

{"global_step": 840000, "eval_re": [12.396971020037764, 131.6416664063564, 
250.96717097111028, 11.156932056456133, 10.789467997649838, 55.485660845937545, 
93.37036388760889, 61.36329012678828, 16.571498998583905, 98.94617255739891], 
"eval_len": [14, 87, 137, 16, 18, 52, 65, 40, 17, 69]}

 85%|████████▍ | 849999/1000000 [9:28:18<1:15:49, 32.97it/s]global step 850000, trans_decision ep_re 63.31500017521442

{"global_step": 850000, "eval_re": [13.604494434293079, 96.4883253083284, 
9.484265370019045, 205.38927129018927, 78.15667634870114, 142.18267897235236, 
6.661441602936436, 10.605512457271802, 58.421327258084034, 12.156008709968624], 
"eval_len": [17, 63, 15, 132, 51, 74, 10, 14, 53, 16]}

 86%|████████▌ | 859999/1000000 [9:34:57<1:12:08, 32.35it/s]global step 860000, trans_decision ep_re 68.37950102549328

{"global_step": 860000, "eval_re": [119.64538875218526, 9.711575503834634, 
169.1919758953411, 8.852814488716119, 56.41542123916868, 90.799604467129, 
19.95956834513328, 180.9603758907742, 14.786338076214024, 13.47194759643635], 
"eval_len": [68, 12, 112, 13, 47, 60, 19, 122, 17, 16]}

 87%|████████▋ | 869999/1000000 [9:41:36<1:05:56, 32.86it/s]global step 870000, trans_decision ep_re 109.44589593381045

{"global_step": 870000, "eval_re": [13.66264217747132, 14.508373801335539, 
13.773464463919508, 128.09908585583645, 177.46381196773993, 299.65416092326063, 
150.97758242315592, 15.085404110401974, 172.93853908552146, 108.29589452946163],
"eval_len": [17, 16, 17, 81, 105, 153, 94, 17, 84, 66]}

 88%|████████▊ | 879998/1000000 [9:48:15<1:01:31, 32.51it/s]global step 880000, trans_decision ep_re 38.676178247714326

{"global_step": 880000, "eval_re": [20.898090545604127, 8.15652299635098, 
148.37168412919243, 11.40274183320024, 8.693260535317847, 126.80526129356122, 
14.137450133222822, 16.553038152444973, 16.13439490136031, 15.609337956888252], 
"eval_len": [25, 12, 103, 18, 17, 65, 16, 17, 22, 24]}

 89%|████████▉ | 889998/1000000 [9:54:54<56:11, 32.63it/s]global step 890000, trans_decision ep_re 68.08755773916026

{"global_step": 890000, "eval_re": [15.573263288898099, 98.22982617930509, 
88.71429658302382, 29.93792995277092, 195.73621567464934, 78.63642372554375, 
87.79185670979554, 10.78755604493848, 11.316694690258995, 64.1515145424187], 
"eval_len": [17, 65, 54, 29, 114, 45, 63, 14, 14, 51]}

 90%|████████▉ | 899997/1000000 [10:01:33<50:51, 32.78it/s]global step 900000, trans_decision ep_re 73.45194280044252

{"global_step": 900000, "eval_re": [5.646144965593613, 9.463615138280744, 
46.57434843649807, 81.14120359442882, 9.185157164200374, 153.56400469496478, 
10.942301813961661, 206.34554616654813, 199.6715356513169, 11.985570378632028], 
"eval_len": [8, 14, 39, 61, 12, 78, 17, 109, 148, 23]}

 91%|█████████ | 909997/1000000 [10:08:12<46:20, 32.37it/s]global step 910000, trans_decision ep_re 58.54593735534327

{"global_step": 910000, "eval_re": [9.374240081623283, 14.717470617018236, 
179.7591194463711, 49.86751777302689, 7.397416107242356, 8.652561255910106, 
101.37350512292704, 15.022967285691854, 94.2541508089947, 105.04042505462712], 
"eval_len": [11, 17, 111, 35, 10, 11, 78, 25, 61, 77]}

 92%|█████████▏| 919997/1000000 [10:14:51<40:35, 32.85it/s]global step 920000, trans_decision ep_re 129.87895383798855

{"global_step": 920000, "eval_re": [55.58486131522309, 211.42320597873294, 
430.58260295594096, 83.36700795069669, 31.881752551863613, 162.35711131609034, 
26.952308441527933, 111.92731960140782, 117.20578084760139, 67.50758742080075], 
"eval_len": [45, 136, 185, 56, 39, 121, 43, 61, 71, 46]}

 93%|█████████▎| 929999/1000000 [10:21:31<35:45, 32.63it/s]global step 930000, trans_decision ep_re 81.58775011490678

{"global_step": 930000, "eval_re": [11.51169952522658, 14.084491607653474, 
86.95614486810159, 127.87511621846002, 32.62009981261963, 153.4166439278638, 
115.3612389596543, 177.28508362859014, 75.94726317638505, 20.819719424513313], 
"eval_len": [14, 22, 58, 78, 35, 100, 84, 99, 49, 20]}

 94%|█████████▍| 939999/1000000 [10:28:11<30:48, 32.46it/s]global step 940000, trans_decision ep_re 59.76614683333927

{"global_step": 940000, "eval_re": [70.01518457693018, 112.44661686788011, 
187.92659389556513, 13.904125183849079, 83.7444220446535, 10.991028805437965, 
22.696329511462746, 11.50082777227676, 11.315995136487286, 73.12034453884999], 
"eval_len": [50, 77, 106, 18, 64, 19, 23, 16, 13, 54]}

 95%|█████████▍| 949999/1000000 [10:35:00<25:29, 32.69it/s]global step 950000, trans_decision ep_re 91.07087921783459

{"global_step": 950000, "eval_re": [8.294781543578015, 146.5165422161889, 
140.02751275037465, 172.51949942820795, 168.1160122378154, 89.07300417826333, 
24.591973955539785, 137.16499831615477, 11.942155565207646, 12.46231198701564], 
"eval_len": [13, 88, 93, 121, 102, 50, 32, 98, 13, 15]}

 96%|█████████▌| 959999/1000000 [10:41:40<20:30, 32.51it/s]global step 960000, trans_decision ep_re 55.81260637158969

{"global_step": 960000, "eval_re": [97.98933275112631, 72.1646971747614, 
123.87014421255603, 68.14302466565184, 73.19318305721536, 11.939438751513594, 
16.36056834985121, 55.4872680960356, 31.35490578474765, 7.623500872437897], 
"eval_len": [64, 63, 69, 63, 68, 14, 21, 81, 47, 16]}

 97%|█████████▋| 969996/1000000 [10:48:08<15:13, 32.86it/s]global step 970000, trans_decision ep_re 79.87067828128555

{"global_step": 970000, "eval_re": [193.5455090378209, 10.744097985654301, 
84.16663686968957, 102.04107259275021, 129.64712538897982, 13.982670134695372, 
11.809199010408106, 82.69848448838323, 156.51340989835285, 13.558577406121165], 
"eval_len": [90, 16, 58, 62, 75, 18, 16, 62, 82, 16]}

 98%|█████████▊| 979996/1000000 [10:54:48<10:08, 32.86it/s]global step 980000, trans_decision ep_re 63.51520544976388

{"global_step": 980000, "eval_re": [12.333190222250689, 10.841576072907321, 
51.6142645108969, 74.68232444258683, 69.80275899403684, 12.224464692759192, 
134.18909858045433, 49.33816799281758, 136.875637935106, 83.25057105382304], 
"eval_len": [15, 15, 36, 51, 52, 21, 96, 51, 82, 50]}

 99%|█████████▉| 989999/1000000 [11:01:27<05:07, 32.57it/s]global step 990000, trans_decision ep_re 16.71047580273602

{"global_step": 990000, "eval_re": [12.138856547358511, 14.304636222490814, 
27.428670881670698, 10.124649456615977, 10.68920987044611, 13.518375246442117, 
9.595279523565514, 18.815845139610005, 11.347050232513364, 39.142184906647095], 
"eval_len": [19, 25, 23, 18, 13, 17, 12, 22, 16, 38]}

100%|█████████▉| 999999/1000000 [11:08:05<00:00, 32.82it/s]global step 1000000, trans_decision ep_re 65.35266155999598

{"global_step": 1000000, "eval_re": [63.51648977452027, 10.506267327298103, 
125.81301877698641, 16.36495604465092, 103.53857859906422, 101.52513208347838, 
82.48850593010567, 28.933789889531916, 70.20268524677105, 50.63719192755289], 
"eval_len": [44, 16, 75, 19, 73, 89, 73, 31, 73, 61]}

100%|██████████| 1000000/1000000 [11:08:16<00:00, 24.94it/s]
