
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.25
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:20<8:21:33, 32.90it/s]global step 10000, trans_decision ep_re 24.949392185195993

{"global_step": 10000, "eval_re": [12.178152403374163, 21.947975922081856, 
23.84777900995887, 11.61398696787942, 9.652573640206272, 6.834492564600758, 
10.465678418404657, 65.01027627941983, 74.51735860348576, 13.425648042548305], 
"eval_len": [15, 20, 51, 16, 19, 14, 19, 61, 51, 15]}

  2%|▏         | 19997/1000000 [09:57<8:16:29, 32.90it/s]global step 20000, trans_decision ep_re 49.258707039765206

{"global_step": 20000, "eval_re": [107.15440385661113, 27.81586212711401, 
110.21223189760434, 9.326198297171116, 14.116391927152929, 13.884820937191368, 
104.11570620189579, 12.368620903794517, 79.18615980021625, 14.406674448900624], 
"eval_len": [69, 37, 78, 12, 17, 20, 74, 15, 67, 16]}

  3%|▎         | 29997/1000000 [16:35<8:11:36, 32.89it/s]global step 30000, trans_decision ep_re 43.31182340551459

{"global_step": 30000, "eval_re": [14.18415665253622, 16.834457150014032, 
203.26189859894785, 12.949129614247557, 9.99440807296814, 9.126486789083321, 
12.029175580129348, 62.3324463975848, 80.04700549265607, 12.359069706978541], 
"eval_len": [17, 40, 127, 19, 12, 14, 17, 41, 61, 15]}

  4%|▍         | 39997/1000000 [23:12<8:05:32, 32.95it/s]global step 40000, trans_decision ep_re 45.639724465955894

{"global_step": 40000, "eval_re": [20.393784524941083, 113.10304741229388, 
77.70625843800877, 8.074926203077945, 13.867596250212177, 72.44391566964113, 
14.334279835681329, 10.816621215039133, 105.38055245430024, 20.276262656363183],
"eval_len": [20, 80, 48, 12, 17, 61, 31, 13, 73, 26]}

  5%|▍         | 49997/1000000 [30:00<8:00:57, 32.92it/s]global step 50000, trans_decision ep_re 81.55889892544504

{"global_step": 50000, "eval_re": [98.87809771391376, 11.829278630752873, 
9.063808497561014, 207.3488938411794, 14.528883808588217, 138.2216590057317, 
78.76135222834415, 80.80258048325982, 96.91884628792582, 79.23558875719371], 
"eval_len": [54, 17, 12, 114, 15, 71, 62, 50, 56, 59]}

  6%|▌         | 59996/1000000 [36:27<7:56:08, 32.90it/s]global step 60000, trans_decision ep_re 43.25077366011108

{"global_step": 60000, "eval_re": [54.474493853405015, 25.40032152256566, 
18.848494076677614, 73.40050510986384, 92.47048385065399, 7.553400045109574, 
12.483517984570385, 10.95786063809493, 124.21328159024726, 12.705377929922598], 
"eval_len": [42, 26, 21, 74, 73, 11, 20, 13, 73, 14]}

  7%|▋         | 69996/1000000 [43:04<7:50:55, 32.91it/s]global step 70000, trans_decision ep_re 48.0680130599039

{"global_step": 70000, "eval_re": [13.56955348252614, 35.9526130580488, 
114.90807110380032, 60.3298656861117, 12.875403326953025, 82.84118562528285, 
15.060712577488838, 111.16654544519491, 21.110900551107083, 12.865279742525313],
"eval_len": [16, 28, 73, 60, 16, 78, 17, 105, 21, 23]}

  8%|▊         | 79996/1000000 [49:42<7:45:18, 32.95it/s]global step 80000, trans_decision ep_re 43.346626247001055

{"global_step": 80000, "eval_re": [21.58384125585312, 59.77270231684053, 
24.49545530437699, 58.07206788041147, 67.83347259373166, 15.401702215995813, 
95.05953131172264, 67.44494444698323, 10.973243186165565, 12.829301957929628], 
"eval_len": [27, 42, 25, 53, 47, 17, 74, 59, 14, 14]}

  9%|▉         | 89996/1000000 [56:30<7:41:59, 32.83it/s]global step 90000, trans_decision ep_re 118.03273958035804

{"global_step": 90000, "eval_re": [7.857770595764016, 354.73739094025086, 
311.4786115903418, 7.206604503744581, 68.56677272603723, 66.81290629910302, 
98.09151822192874, 52.64649188501965, 83.78071301769347, 129.14861602369697], 
"eval_len": [15, 200, 175, 13, 55, 51, 77, 58, 59, 107]}

 10%|▉         | 99999/1000000 [1:03:10<7:35:17, 32.95it/s]global step 100000, trans_decision ep_re 44.31535960520837

{"global_step": 100000, "eval_re": [6.9906482906835805, 9.822228452157862, 
10.572714249060798, 15.273232870596107, 42.53802962283286, 33.93001920947756, 
129.87224888719086, 50.36621321692517, 133.80235665845964, 9.985904594699253], 
"eval_len": [10, 13, 13, 20, 50, 35, 81, 31, 91, 12]}

 11%|█         | 109996/1000000 [1:09:37<7:32:25, 32.79it/s]global step 110000, trans_decision ep_re 93.99966162326238

{"global_step": 110000, "eval_re": [96.77954453191654, 57.57623189393135, 
104.77533461761749, 179.9494565585733, 8.968370765486549, 12.398239395093626, 
194.3674273153027, 75.38218892816518, 131.32483434526617, 78.474987881271], 
"eval_len": [60, 71, 65, 115, 14, 15, 108, 43, 82, 67]}

 12%|█▏        | 119999/1000000 [1:16:16<7:26:46, 32.83it/s]global step 120000, trans_decision ep_re 55.927293075615445

{"global_step": 120000, "eval_re": [42.81406622803029, 6.330169334714386, 
8.731491425256014, 14.441833436591955, 12.759576923706598, 13.57284083254881, 
47.631218759158095, 345.6104538727053, 58.10598571683923, 9.275294226603673], 
"eval_len": [57, 9, 15, 17, 18, 15, 64, 173, 73, 15]}

 13%|█▎        | 129999/1000000 [1:22:55<7:20:21, 32.93it/s]global step 130000, trans_decision ep_re 58.82115533715794

{"global_step": 130000, "eval_re": [89.04228428279754, 71.62342118371451, 
51.57056658214926, 120.64914501170607, 118.66882604599436, 8.605276651953591, 
30.610931382727237, 13.363754085049692, 73.52568109991677, 10.551667045570463], 
"eval_len": [86, 42, 32, 79, 89, 13, 31, 18, 63, 20]}

 14%|█▍        | 139999/1000000 [1:29:34<7:16:15, 32.86it/s]global step 140000, trans_decision ep_re 40.54121869686378

{"global_step": 140000, "eval_re": [12.013823288577838, 34.29093120068346, 
9.640130037998167, 16.03984056765806, 105.35242580228517, 8.727251358410282, 
17.46561638247612, 100.06767448283209, 50.65935100337253, 51.155142844344], 
"eval_len": [15, 33, 16, 17, 83, 14, 18, 75, 82, 45]}

 15%|█▍        | 149999/1000000 [1:36:11<7:11:54, 32.80it/s]global step 150000, trans_decision ep_re 56.400917643021046

{"global_step": 150000, "eval_re": [57.22991881205633, 149.26931777923446, 
14.662782974999825, 91.37368137175424, 71.864445471781, 18.42281406249395, 
9.019957251210014, 12.597644173991192, 83.83268713822137, 55.735927394468085], 
"eval_len": [36, 91, 18, 69, 42, 18, 11, 18, 98, 50]}

 16%|█▌        | 159999/1000000 [1:42:51<7:11:28, 32.45it/s]global step 160000, trans_decision ep_re 46.56335910812033

{"global_step": 160000, "eval_re": [11.8328017967714, 18.070805332575855, 
67.17094617560446, 64.26069335204654, 100.31109793483641, 69.88276471493246, 
70.17206225551894, 9.261153377559378, 15.60105392687024, 39.070212214487654], 
"eval_len": [25, 29, 44, 53, 54, 48, 62, 17, 21, 35]}

 17%|█▋        | 169999/1000000 [1:49:29<7:00:31, 32.90it/s]global step 170000, trans_decision ep_re 46.62446167998679

{"global_step": 170000, "eval_re": [71.36558899878621, 54.77634202916135, 
10.55846035300594, 16.556430040712772, 15.337506986303557, 111.85263633403166, 
79.6597599891093, 14.286754273547752, 7.809416572342762, 84.0417212228666], 
"eval_len": [51, 46, 16, 18, 28, 70, 80, 18, 14, 58]}

 18%|█▊        | 179999/1000000 [1:56:06<6:55:45, 32.87it/s]global step 180000, trans_decision ep_re 48.33502064056898

{"global_step": 180000, "eval_re": [74.29341151540092, 15.266495608743995, 
7.4340563293262605, 49.334663238357656, 13.732150025209124, 40.25052484356239, 
89.36036474407592, 8.576937259210222, 171.42791733822617, 13.673685503577124], 
"eval_len": [43, 18, 12, 59, 16, 52, 53, 12, 112, 17]}

 19%|█▉        | 189999/1000000 [2:02:44<6:51:48, 32.78it/s]global step 190000, trans_decision ep_re 50.026181997142764

{"global_step": 190000, "eval_re": [52.144221989098625, 95.48653680951249, 
73.7848581661577, 9.704400462284793, 70.5670443323036, 51.212909106002556, 
58.25176897369575, 9.98731020452022, 20.01027260812466, 59.11249731972732], 
"eval_len": [35, 63, 53, 12, 52, 49, 47, 17, 28, 49]}

 20%|█▉        | 199999/1000000 [2:09:21<6:50:59, 32.44it/s]global step 200000, trans_decision ep_re 67.34961172815409

{"global_step": 200000, "eval_re": [15.873010613399558, 91.43711129874606, 
15.09387078825782, 12.349326562332198, 139.69978038294244, 56.66109587634439, 
118.87607613213459, 91.07046801501232, 39.26969600265611, 93.16568160971532], 
"eval_len": [18, 72, 17, 18, 106, 57, 70, 87, 40, 61]}

 21%|██        | 209999/1000000 [2:16:01<6:40:01, 32.91it/s]global step 210000, trans_decision ep_re 70.09711401725387

{"global_step": 210000, "eval_re": [113.89339803443961, 8.734384581756306, 
113.98035082995803, 24.53362514947251, 13.850617556244446, 136.66661479503338, 
87.97917421279, 81.86294193403131, 108.01516944259461, 11.454863636218489], 
"eval_len": [64, 11, 77, 43, 15, 73, 50, 63, 105, 13]}

 22%|██▏       | 219999/1000000 [2:22:38<6:35:03, 32.91it/s]global step 220000, trans_decision ep_re 24.213349961222917

{"global_step": 220000, "eval_re": [44.294324581172255, 47.55665321886167, 
12.762889294559894, 14.486827646612905, 27.41282747814373, 20.592025350162743, 
11.878023904740036, 8.933081847699395, 43.96018824650143, 10.256658043775122], 
"eval_len": [44, 49, 14, 17, 39, 19, 14, 11, 46, 17]}

 23%|██▎       | 229999/1000000 [2:29:15<6:30:44, 32.84it/s]global step 230000, trans_decision ep_re 43.8690302870675

{"global_step": 230000, "eval_re": [62.133680387453516, 14.950768315025316, 
12.491502624076224, 47.49476572591395, 13.141595515465065, 207.68180530230958, 
17.7141194224147, 9.960864492387683, 14.65239754366468, 38.46880354196421], 
"eval_len": [44, 16, 16, 41, 16, 115, 22, 12, 15, 40]}

 24%|██▍       | 239999/1000000 [2:35:52<6:24:39, 32.93it/s]global step 240000, trans_decision ep_re 47.34420208366877

{"global_step": 240000, "eval_re": [84.22916036464372, 8.502680935916109, 
73.45253776560318, 11.00747101668969, 14.781522506214452, 77.3887151305786, 
10.654696942490062, 27.855099452014652, 59.90992272327838, 105.66021399925879], 
"eval_len": [55, 10, 42, 15, 17, 64, 19, 33, 40, 69]}

 25%|██▍       | 249999/1000000 [2:42:29<6:20:20, 32.87it/s]global step 250000, trans_decision ep_re 35.79577139782644

{"global_step": 250000, "eval_re": [84.26680212288352, 10.01219974751604, 
10.67461659271755, 19.38738075040525, 72.45878184149475, 50.86159165662996, 
46.896833503471846, 8.106838080758841, 19.179762895180936, 36.11290678720573], 
"eval_len": [69, 16, 18, 29, 62, 52, 44, 10, 19, 39]}

 26%|██▌       | 259999/1000000 [2:49:06<6:14:48, 32.91it/s]global step 260000, trans_decision ep_re 40.79005716599971

{"global_step": 260000, "eval_re": [14.601948119623131, 14.533002216673072, 
86.38939742462608, 5.8257003127889675, 12.58399647917991, 28.315879777164994, 
58.24474542216144, 18.49608401912646, 51.9558530288488, 116.95396485980427], 
"eval_len": [16, 17, 66, 22, 22, 38, 49, 18, 36, 105]}

 27%|██▋       | 269999/1000000 [2:55:43<6:09:58, 32.89it/s]global step 270000, trans_decision ep_re 45.5433032623168

{"global_step": 270000, "eval_re": [20.36332050950065, 121.62328249504799, 
39.8159853797913, 62.6605994489427, 20.355181459474334, 11.171861781627134, 
77.82049240255233, 18.770760807346125, 74.95023345135901, 7.901314887526422], 
"eval_len": [24, 64, 39, 58, 35, 14, 73, 35, 56, 16]}

 28%|██▊       | 279999/1000000 [3:02:21<6:04:39, 32.91it/s]global step 280000, trans_decision ep_re 53.87029298137319

{"global_step": 280000, "eval_re": [8.543904895978004, 8.915754305104661, 
85.40206507669572, 14.283168411917972, 21.53013001233912, 158.05876004087582, 
146.02933339213604, 11.351611262557142, 71.10490373877826, 13.483298677349138], 
"eval_len": [11, 14, 67, 24, 36, 92, 74, 13, 46, 18]}

 29%|██▉       | 289999/1000000 [3:08:59<5:59:15, 32.94it/s]global step 290000, trans_decision ep_re 52.07243595536236

{"global_step": 290000, "eval_re": [36.21228713547122, 55.9331233546507, 
109.76298030573948, 23.452675677615268, 21.742569258436564, 19.094747649922407, 
12.721370957920776, 170.43506147979818, 13.593628581156675, 57.7759151529123], 
"eval_len": [30, 46, 61, 24, 35, 29, 16, 93, 15, 45]}

 30%|██▉       | 299999/1000000 [3:15:35<5:53:58, 32.96it/s]global step 300000, trans_decision ep_re 28.96240349465474

{"global_step": 300000, "eval_re": [19.18171680046988, 17.6812433014017, 
16.92312322896459, 10.732023604504832, 17.33581809970083, 23.15168912068482, 
19.41051383029702, 52.12471433321089, 84.69166229315105, 28.39153033416175], 
"eval_len": [19, 21, 26, 17, 23, 22, 20, 43, 47, 39]}

 31%|███       | 309999/1000000 [3:22:14<5:48:04, 33.04it/s]global step 310000, trans_decision ep_re 39.23567513209073

{"global_step": 310000, "eval_re": [13.013543966723217, 79.01770244101816, 
11.583067579973141, 79.57163741919382, 19.61837312957177, 12.52452599154562, 
17.106173315285453, 15.389100875698075, 80.21044463104411, 64.32218197085395], 
"eval_len": [16, 57, 15, 66, 19, 13, 19, 22, 65, 60]}

 32%|███▏      | 319999/1000000 [3:28:51<5:41:14, 33.21it/s]global step 320000, trans_decision ep_re 73.22256371435111

{"global_step": 320000, "eval_re": [157.94934605306847, 12.314670417468694, 
77.12651352779352, 193.28725484893042, 66.69654018139188, 43.34429443310491, 
8.47396561792365, 74.01054840547921, 85.81752438809373, 13.20497927025655], 
"eval_len": [122, 14, 50, 163, 71, 42, 12, 75, 58, 16]}

 33%|███▎      | 329999/1000000 [3:35:28<5:39:36, 32.88it/s]global step 330000, trans_decision ep_re 58.06475325674584

{"global_step": 330000, "eval_re": [237.2271469170606, 8.431022271085485, 
146.46515744646877, 14.424636790680443, 108.19644680354443, 16.403217472682382, 
8.333547833574759, 19.742602917990617, 13.578600514619145, 7.8451535997518365], 
"eval_len": [121, 13, 122, 17, 67, 16, 14, 37, 15, 10]}

 34%|███▍      | 339999/1000000 [3:42:05<5:33:07, 33.02it/s]global step 340000, trans_decision ep_re 91.56514067323351

{"global_step": 340000, "eval_re": [149.43694268830353, 276.60242007972175, 
13.62806446326022, 14.446933046587167, 18.563465261405387, 68.95824725442051, 
112.6066666106642, 105.5674508364472, 10.526447798907949, 145.31476869261715], 
"eval_len": [121, 143, 16, 20, 20, 57, 74, 56, 14, 109]}

 35%|███▍      | 349999/1000000 [3:48:40<5:28:20, 32.99it/s]global step 350000, trans_decision ep_re 75.5703233579273

{"global_step": 350000, "eval_re": [9.859344598372122, 5.81225735986006, 
86.83696355744057, 14.61006258198131, 93.59619115722938, 92.25862796623096, 
217.68870132896183, 65.90112990019111, 85.78853176641796, 83.35142336258761], 
"eval_len": [14, 16, 67, 18, 74, 106, 110, 113, 73, 59]}

 36%|███▌      | 359999/1000000 [3:55:15<5:19:42, 33.36it/s]global step 360000, trans_decision ep_re 70.55056423767846

{"global_step": 360000, "eval_re": [118.09075458477334, 9.074914707519532, 
11.87239626938222, 99.51044300517854, 11.680560770029482, 87.97661168367216, 
87.92333908761489, 17.282018840563623, 195.21686018418742, 66.87774324386343], 
"eval_len": [61, 13, 15, 87, 15, 63, 53, 18, 123, 65]}

 37%|███▋      | 369999/1000000 [4:01:48<5:19:18, 32.88it/s]global step 370000, trans_decision ep_re 34.8328450866969

{"global_step": 370000, "eval_re": [59.953221245400485, 10.250795667373183, 
6.171418504269104, 17.59201839513461, 49.26261379420864, 14.192160619279155, 
39.86442159963658, 16.628293287921387, 121.92620524412249, 12.487302509623321], 
"eval_len": [46, 15, 9, 22, 37, 21, 41, 17, 77, 15]}

 38%|███▊      | 379999/1000000 [4:08:21<5:10:50, 33.24it/s]global step 380000, trans_decision ep_re 57.80257738616431

{"global_step": 380000, "eval_re": [9.066657091236577, 91.62578695216762, 
87.56041073227982, 10.889535722529818, 17.27888627705862, 130.52365089791, 
101.204529185662, 104.81444352687272, 9.600353362693806, 15.461520113232146], 
"eval_len": [12, 56, 84, 20, 20, 80, 76, 70, 19, 24]}

 39%|███▉      | 389999/1000000 [4:14:57<5:07:56, 33.01it/s]global step 390000, trans_decision ep_re 63.11855027475023

{"global_step": 390000, "eval_re": [106.37756134100971, 11.837462535979354, 
77.59847133746202, 12.027558970140618, 11.69828527041007, 92.65194331104223, 
15.077403784379168, 120.81068236046448, 91.77482065324257, 91.33131318337203], 
"eval_len": [76, 20, 61, 14, 17, 84, 32, 102, 60, 73]}

 40%|███▉      | 399999/1000000 [4:21:34<5:04:03, 32.89it/s]global step 400000, trans_decision ep_re 101.26167515725982

{"global_step": 400000, "eval_re": [61.1115430442656, 122.03740815685669, 
185.0117913061871, 103.07545000119761, 128.69606249510136, 13.45727348857956, 
19.050368728783905, 91.92154110494913, 186.571468469752, 101.68384477692517], 
"eval_len": [52, 119, 146, 58, 88, 18, 19, 75, 99, 59]}

 41%|████      | 409998/1000000 [4:28:10<4:58:54, 32.90it/s]global step 410000, trans_decision ep_re 52.56483151233012

{"global_step": 410000, "eval_re": [166.35064863471905, 13.653192236243072, 
14.519536340450339, 98.0878482892073, 17.629746452937713, 78.60417548525338, 
8.808126156603896, 12.543339246982203, 108.80862250326015, 6.64307977764418], 
"eval_len": [106, 14, 20, 67, 20, 50, 15, 14, 128, 14]}

 42%|████▏     | 419998/1000000 [4:34:45<4:52:12, 33.08it/s]global step 420000, trans_decision ep_re 83.16175546613161

{"global_step": 420000, "eval_re": [8.658448307510536, 197.1125446843762, 
204.21104653076227, 97.15217955687505, 16.307731919807384, 9.118044818059682, 
12.518042594213021, 113.617883273585, 102.91074156875818, 70.0108914073688], 
"eval_len": [16, 135, 108, 63, 20, 16, 18, 76, 58, 71]}

 43%|████▎     | 429997/1000000 [4:41:19<4:49:20, 32.83it/s]global step 430000, trans_decision ep_re 63.35237981910895

{"global_step": 430000, "eval_re": [12.677461013727706, 52.29278129142777, 
64.41935979663515, 57.79036623509637, 181.26313473462358, 12.30850162327961, 
14.872007907651055, 122.5733345348469, 24.64310050296043, 90.6837505508408], 
"eval_len": [17, 60, 52, 40, 114, 14, 16, 70, 22, 66]}

 44%|████▍     | 439997/1000000 [4:47:54<4:43:30, 32.92it/s]global step 440000, trans_decision ep_re 46.19242432826355

{"global_step": 440000, "eval_re": [10.241013185044933, 13.891823270330839, 
47.51287140455196, 73.06490375241052, 13.921215593054203, 7.368918831259185, 
77.17085351957195, 6.935878237853459, 177.2075334321302, 34.60923205642818], 
"eval_len": [16, 24, 36, 44, 18, 13, 65, 13, 94, 49]}

 45%|████▍     | 449997/1000000 [4:54:27<4:37:09, 33.07it/s]global step 450000, trans_decision ep_re 89.3596981254439

{"global_step": 450000, "eval_re": [106.00831411760129, 73.4585950973345, 
43.2212793033944, 85.26620245814544, 180.92184385874418, 219.25672115803832, 
11.14632359575946, 11.095657340487879, 120.06751433647901, 43.15452998845456], 
"eval_len": [63, 53, 33, 68, 87, 115, 14, 14, 65, 33]}

 46%|████▌     | 459997/1000000 [5:01:01<4:33:50, 32.87it/s]global step 460000, trans_decision ep_re 54.668873144808785

{"global_step": 460000, "eval_re": [70.96955710988733, 10.568686743639402, 
117.28920847770061, 75.59655388170576, 88.9312171672479, 81.5457496120216, 
7.494812864489434, 6.816319710940853, 9.343354974614336, 78.13327090584059], 
"eval_len": [57, 15, 73, 61, 62, 49, 10, 10, 11, 57]}

 47%|████▋     | 469997/1000000 [5:07:35<4:25:00, 33.33it/s]global step 470000, trans_decision ep_re 53.24641542213476

{"global_step": 470000, "eval_re": [76.90450156610308, 22.000282557402496, 
235.67533914198071, 56.79175938450892, 70.68103530799519, 15.61967720954371, 
15.226378876990212, 7.463367811952736, 13.66990691187496, 18.43190545299558], 
"eval_len": [77, 22, 105, 50, 52, 25, 16, 10, 20, 19]}

 48%|████▊     | 479997/1000000 [5:14:08<4:24:06, 32.81it/s]global step 480000, trans_decision ep_re 38.89158950993021

{"global_step": 480000, "eval_re": [18.060394599491232, 25.475273085148494, 
10.184207902218764, 22.27089384918317, 10.761212275390971, 11.68929977985206, 
84.01058655741303, 146.8306078265074, 44.59157192422082, 15.041847299876142], 
"eval_len": [19, 54, 16, 37, 15, 16, 58, 103, 40, 17]}

 49%|████▉     | 489997/1000000 [5:20:44<4:18:01, 32.94it/s]global step 490000, trans_decision ep_re 69.7038735893062

{"global_step": 490000, "eval_re": [210.55596121106223, 19.415501420763658, 
75.97309444426769, 82.14105661213345, 72.77856375077049, 12.345742888291102, 
19.843015674420187, 182.35440937759356, 8.58306176251249, 13.048328751247162], 
"eval_len": [99, 20, 52, 84, 56, 18, 43, 89, 14, 15]}

 50%|████▉     | 499997/1000000 [5:27:18<4:12:10, 33.05it/s]global step 500000, trans_decision ep_re 36.29956086542567

{"global_step": 500000, "eval_re": [6.306619881634038, 56.5888857638273, 
12.55957531374624, 100.52922628929915, 8.697174062463725, 10.4955337836367, 
9.487494482507802, 10.583341976834218, 138.22893246100696, 9.518824639300508], 
"eval_len": [10, 60, 15, 65, 12, 15, 12, 15, 80, 14]}

 51%|█████     | 509997/1000000 [5:33:53<4:07:54, 32.94it/s]global step 510000, trans_decision ep_re 55.41176696211024

{"global_step": 510000, "eval_re": [14.085717597254067, 106.95729022310121, 
81.63564449085037, 14.786393439173578, 6.917637027608507, 54.581702635500754, 
96.0771478542743, 75.9937412810967, 11.561135771985095, 91.52125930025784], 
"eval_len": [16, 73, 71, 17, 10, 33, 90, 46, 16, 97]}

 52%|█████▏    | 519997/1000000 [5:40:28<4:01:13, 33.16it/s]global step 520000, trans_decision ep_re 44.87102004175418

{"global_step": 520000, "eval_re": [19.13659318735752, 88.6645496033931, 
181.22938077572925, 17.923876964875962, 11.348841229263531, 13.533888284194182, 
72.53298083447788, 14.59499735318469, 16.147005855148997, 13.598086329916637], 
"eval_len": [19, 129, 100, 21, 15, 15, 50, 18, 17, 18]}

 53%|█████▎    | 529997/1000000 [5:47:02<3:57:05, 33.04it/s]global step 530000, trans_decision ep_re 53.40577981561339

{"global_step": 530000, "eval_re": [97.64916242357639, 63.82812085086336, 
124.51954418197846, 24.667693260811774, 19.85550178208721, 15.341665682937194, 
7.614609583947421, 85.15733618017016, 5.918072504055213, 89.50609170570672], 
"eval_len": [57, 41, 83, 27, 19, 16, 14, 83, 11, 67]}

 54%|█████▍    | 539997/1000000 [5:53:35<3:49:24, 33.42it/s]global step 540000, trans_decision ep_re 36.23903071208874

{"global_step": 540000, "eval_re": [10.169610651574128, 139.09929281613472, 
14.020613153456324, 13.536086856203667, 44.26517909952092, 10.956595546870645, 
93.0925871900769, 10.190839750241434, 14.658746618306406, 12.400755438502326], 
"eval_len": [15, 117, 17, 18, 57, 15, 64, 14, 18, 14]}

 55%|█████▍    | 549997/1000000 [6:00:20<3:48:04, 32.88it/s]global step 550000, trans_decision ep_re 152.03065376736734

{"global_step": 550000, "eval_re": [31.814237379011345, 128.4285272541378, 
104.56575916080006, 136.10432822192197, 81.50219856312277, 113.01257339734589, 
129.11517697521398, 322.0790098350947, 350.3004307023502, 123.38429618467465], 
"eval_len": [54, 108, 75, 129, 56, 79, 110, 147, 159, 87]}

 56%|█████▌    | 559999/1000000 [6:06:45<3:40:16, 33.29it/s]global step 560000, trans_decision ep_re 97.77197222723547

{"global_step": 560000, "eval_re": [16.525471425265913, 9.673481303463827, 
15.724745776203518, 6.4849092447282235, 266.0408122318733, 133.7858945390237, 
139.68800070147557, 141.0307598865498, 141.43470976317926, 107.33093740059165], 
"eval_len": [16, 21, 17, 9, 138, 91, 74, 78, 85, 66]}

 57%|█████▋    | 569999/1000000 [6:13:21<3:37:31, 32.95it/s]global step 570000, trans_decision ep_re 39.2610443732069

{"global_step": 570000, "eval_re": [11.22797302286507, 16.807820621175683, 
127.74588640722574, 9.538010440470885, 18.96963808322852, 101.12737071075547, 
12.851836720546444, 16.804054762926857, 66.02311264251293, 11.514740320361323], 
"eval_len": [24, 22, 77, 18, 19, 56, 14, 19, 46, 16]}

 58%|█████▊    | 579999/1000000 [6:19:57<3:30:48, 33.20it/s]global step 580000, trans_decision ep_re 65.3173333243229

{"global_step": 580000, "eval_re": [88.93841963615505, 9.94275335166825, 
9.073943870095897, 116.34280343354546, 12.427809630368857, 80.67304266807069, 
9.029298412639168, 201.3434592223923, 17.387814309643343, 108.01398870865005], 
"eval_len": [62, 11, 12, 87, 20, 53, 16, 116, 28, 91]}

 59%|█████▉    | 589999/1000000 [6:26:31<3:27:14, 32.97it/s]global step 590000, trans_decision ep_re 73.68209898215869

{"global_step": 590000, "eval_re": [53.07473708610681, 76.60214371341398, 
9.674898785229152, 191.87487445672224, 11.955180541006918, 13.088810448526964, 
171.99390983557768, 9.92050418171543, 186.2778393677049, 12.358091405582735], 
"eval_len": [43, 63, 19, 132, 15, 19, 172, 16, 120, 15]}

 60%|█████▉    | 599999/1000000 [6:33:05<3:22:33, 32.91it/s]global step 600000, trans_decision ep_re 27.745742699778525

{"global_step": 600000, "eval_re": [48.88123597417301, 16.259381643124556, 
31.272852674391597, 12.141264889000537, 12.967447695386085, 12.642156633104872, 
12.688608218230085, 42.174871794071485, 74.60101468292565, 13.828592793377393], 
"eval_len": [81, 29, 45, 14, 14, 19, 15, 31, 46, 15]}

 61%|██████    | 609999/1000000 [6:39:40<3:17:32, 32.90it/s]global step 610000, trans_decision ep_re 27.704329955971154

{"global_step": 610000, "eval_re": [34.94619208173921, 41.421382374625566, 
8.956883994030033, 53.42801377747907, 8.545921524557347, 10.29058622095577, 
11.631817929357453, 65.09539141825017, 34.189015686555074, 8.538094552161873], 
"eval_len": [38, 36, 19, 79, 11, 13, 17, 87, 60, 11]}

 62%|██████▏   | 619999/1000000 [6:46:14<3:11:34, 33.06it/s]global step 620000, trans_decision ep_re 35.93322099316088

{"global_step": 620000, "eval_re": [29.32561833455544, 36.692342743534404, 
8.68189755430723, 16.905684769626582, 154.28995701911407, 10.627723763704026, 
26.9247719935882, 16.993201260257337, 10.57430504282165, 48.316707450099884], 
"eval_len": [42, 47, 10, 19, 155, 14, 46, 17, 19, 70]}

 63%|██████▎   | 629999/1000000 [6:52:51<3:07:38, 32.87it/s]global step 630000, trans_decision ep_re 62.33742847341949

{"global_step": 630000, "eval_re": [18.167730569869303, 23.92682011098685, 
15.217025798520364, 93.19645703717418, 16.087787967542294, 12.68217385644281, 
177.8366144774916, 187.3937678856056, 59.844598878738296, 19.021308151823618], 
"eval_len": [26, 29, 19, 55, 18, 20, 91, 122, 72, 22]}

 64%|██████▍   | 639999/1000000 [6:59:28<3:02:35, 32.86it/s]global step 640000, trans_decision ep_re 61.712434672648804

{"global_step": 640000, "eval_re": [50.41043296896505, 11.623759152511532, 
12.07444714289293, 33.351692690745374, 73.30579624261375, 230.89288869164523, 
164.33210652258347, 9.652608747913398, 12.64929857054912, 18.831315996068255], 
"eval_len": [48, 17, 14, 32, 55, 128, 88, 33, 19, 18]}

 65%|██████▍   | 649999/1000000 [7:06:02<2:56:46, 33.00it/s]global step 650000, trans_decision ep_re 68.52600653805257

{"global_step": 650000, "eval_re": [8.562415704111446, 28.36602617316498, 
210.27086330386905, 24.32213113467284, 14.532859392315785, 9.939925606812833, 
67.4558533468652, 82.37408937091813, 10.938484132257464, 228.49741721553795], 
"eval_len": [12, 35, 105, 33, 16, 15, 43, 72, 14, 104]}

 66%|██████▌   | 659999/1000000 [7:12:36<2:52:34, 32.84it/s]global step 660000, trans_decision ep_re 47.71052590435372

{"global_step": 660000, "eval_re": [65.00493862351901, 115.52173780769165, 
112.5389831997007, 18.722723435626158, 8.88633973095589, 10.915261005906078, 
15.313028114958723, 65.03828698960706, 47.93897382485926, 17.224986310712723], 
"eval_len": [62, 88, 66, 18, 12, 13, 21, 43, 67, 17]}

 67%|██████▋   | 669999/1000000 [7:19:09<2:46:41, 32.99it/s]global step 670000, trans_decision ep_re 44.94551599019555

{"global_step": 670000, "eval_re": [9.521667451813943, 17.852970305153654, 
11.697136642777306, 168.99459233360795, 11.938112327906305, 141.5813351425653, 
11.190624017629364, 59.780107187314634, 6.986940805697618, 9.911673687489497], 
"eval_len": [17, 18, 19, 84, 16, 70, 17, 53, 11, 14]}

 68%|██████▊   | 679999/1000000 [7:25:43<2:40:32, 33.22it/s]global step 680000, trans_decision ep_re 48.07384076263362

{"global_step": 680000, "eval_re": [9.504691107157244, 107.46644336554775, 
163.6201967507817, 14.191067283659963, 9.487742423221238, 9.709324277636236, 
13.723242515668032, 8.361924895425961, 22.717392070502743, 121.95638293673525], 
"eval_len": [14, 67, 92, 19, 17, 14, 15, 17, 40, 65]}

 69%|██████▉   | 689999/1000000 [7:32:18<2:35:30, 33.22it/s]global step 690000, trans_decision ep_re 23.891790759665003

{"global_step": 690000, "eval_re": [35.9113837944695, 17.512367394412824, 
59.151167736890415, 14.196486392349549, 10.750071492093785, 28.2716827506335, 
16.63844179995545, 19.46810210933253, 13.988864761794785, 23.02933936471767], 
"eval_len": [40, 19, 53, 16, 18, 40, 21, 43, 18, 32]}

 70%|██████▉   | 699999/1000000 [7:38:53<2:31:37, 32.98it/s]global step 700000, trans_decision ep_re 59.24082949284567

{"global_step": 700000, "eval_re": [9.074942911251979, 26.5963253873143, 
10.844940303627867, 14.000925950694695, 93.99276810804055, 238.9034077726572, 
20.8142584011037, 128.25005012712464, 30.836821898282693, 19.093854068359185], 
"eval_len": [17, 53, 15, 17, 57, 115, 47, 78, 31, 18]}

 71%|███████   | 709999/1000000 [7:45:29<2:26:53, 32.90it/s]global step 710000, trans_decision ep_re 81.85105058315533

{"global_step": 710000, "eval_re": [329.86614601633937, 109.26160880086063, 
22.99566188484789, 10.274663981575149, 71.88418093067978, 8.327113315246166, 
10.652191012978488, 9.34782601069143, 85.92917044973441, 159.9719434285999], 
"eval_len": [143, 69, 23, 15, 46, 14, 15, 12, 58, 81]}

 72%|███████▏  | 719999/1000000 [7:52:05<2:21:35, 32.96it/s]global step 720000, trans_decision ep_re 66.23831195698727

{"global_step": 720000, "eval_re": [115.25328394321903, 96.58623516570302, 
70.32205587957905, 96.3436509342422, 10.164494673982826, 11.059819069431796, 
15.056848853688217, 103.35694840617758, 134.26135333828537, 9.978429305563655], 
"eval_len": [80, 74, 65, 71, 16, 19, 30, 64, 67, 25]}

 73%|███████▎  | 729999/1000000 [7:58:41<2:16:38, 32.93it/s]global step 730000, trans_decision ep_re 42.86638742991709

{"global_step": 730000, "eval_re": [35.6654054802245, 28.455537365632924, 
89.8626868742918, 69.96070987159906, 7.518137225491041, 10.870343056957147, 
21.336727570648335, 29.667906794514657, 40.62978657905324, 94.69663348075817], 
"eval_len": [40, 33, 65, 69, 13, 15, 36, 39, 33, 52]}

 74%|███████▍  | 739999/1000000 [8:05:17<2:11:06, 33.05it/s]global step 740000, trans_decision ep_re 61.220218467567676

{"global_step": 740000, "eval_re": [80.25914471549675, 161.05576428023818, 
19.318035663646242, 14.63042416756968, 16.031153528240207, 15.727537534384366, 
71.79790276108879, 106.02134866479237, 9.872693029806856, 117.4881803304134], 
"eval_len": [70, 78, 27, 18, 18, 17, 52, 59, 16, 61]}

 75%|███████▍  | 749999/1000000 [8:11:52<2:06:43, 32.88it/s]global step 750000, trans_decision ep_re 71.81837887555955

{"global_step": 750000, "eval_re": [10.564470633556809, 73.54569073458862, 
130.45093115576773, 12.386302086205584, 76.24976158434467, 11.941506393438495, 
147.00719214627145, 137.23735408695512, 104.06384917965083, 14.736730754816225],
"eval_len": [19, 48, 69, 16, 72, 13, 72, 79, 80, 16]}

 76%|███████▌  | 759999/1000000 [8:18:28<2:01:38, 32.88it/s]global step 760000, trans_decision ep_re 67.81926085228483

{"global_step": 760000, "eval_re": [126.52145431083095, 47.84978386281103, 
32.843916378422826, 11.522055291855166, 12.90850216405719, 119.9933920309529, 
73.1450553857204, 49.38368897455081, 148.97528308214456, 55.04947704150253], 
"eval_len": [72, 33, 31, 16, 16, 90, 72, 43, 88, 47]}

 77%|███████▋  | 769999/1000000 [8:25:04<1:56:03, 33.03it/s]global step 770000, trans_decision ep_re 42.81759152648711

{"global_step": 770000, "eval_re": [9.282636581804203, 19.776947200901247, 
17.2282323077466, 5.2315649065634275, 14.724671251660736, 212.16904392236492, 
10.372614530447844, 20.550045128454354, 104.50505320293881, 14.335106231988865],
"eval_len": [22, 32, 22, 24, 22, 98, 13, 18, 55, 21]}

 78%|███████▊  | 779999/1000000 [8:31:40<1:51:24, 32.91it/s]global step 780000, trans_decision ep_re 48.428173653456916

{"global_step": 780000, "eval_re": [152.34310045363523, 27.134957217005315, 
72.10027533192147, 14.600997349930985, 91.75868472068886, 8.080542893062429, 
10.184073837386652, 13.011374829020214, 73.04432337883956, 22.02340652307846], 
"eval_len": [93, 27, 48, 16, 59, 12, 14, 16, 85, 22]}

 79%|███████▉  | 789999/1000000 [8:38:16<1:46:10, 32.96it/s]global step 790000, trans_decision ep_re 70.1332999065719

{"global_step": 790000, "eval_re": [19.53464201792653, 67.1519864766267, 
12.112055207132336, 153.49449715041118, 74.28046765058093, 33.10794459930764, 
199.77870473362887, 84.80073053426023, 46.91141508800873, 10.16055560783593], 
"eval_len": [31, 59, 15, 88, 80, 42, 123, 96, 55, 14]}

 80%|███████▉  | 799998/1000000 [8:44:52<1:41:10, 32.94it/s]global step 800000, trans_decision ep_re 38.955775099982716

{"global_step": 800000, "eval_re": [11.155405063114689, 20.436690703418538, 
60.13539501797546, 12.656378688522175, 10.34760257789532, 115.75485839128956, 
34.597073727924354, 15.908840476402341, 7.2179893147088645, 101.34751703857583],
"eval_len": [15, 23, 54, 21, 16, 76, 29, 17, 24, 80]}

 81%|████████  | 809998/1000000 [8:51:28<1:36:14, 32.90it/s]global step 810000, trans_decision ep_re 55.99894820262242

{"global_step": 810000, "eval_re": [12.943815141956955, 13.349056348496395, 
134.2953921254756, 84.33838083899477, 7.948686658610855, 10.578267143026258, 
12.20334093438567, 92.86936092886366, 96.60422692267736, 94.85895498373671], 
"eval_len": [16, 15, 79, 53, 12, 17, 19, 90, 69, 77]}

 82%|████████▏ | 819998/1000000 [8:58:04<1:30:56, 32.99it/s]global step 820000, trans_decision ep_re 44.98539859800336

{"global_step": 820000, "eval_re": [19.437483804392713, 54.88187298247201, 
17.0067037301351, 14.43282076937401, 21.473875143436462, 13.932418172428655, 
12.113417891432313, 126.616683873276, 150.88709555093723, 19.07161406214906], 
"eval_len": [19, 49, 18, 18, 23, 21, 19, 67, 80, 18]}

 83%|████████▎ | 829998/1000000 [9:04:38<1:25:49, 33.01it/s]global step 830000, trans_decision ep_re 41.70852282216812

{"global_step": 830000, "eval_re": [38.536139499289746, 10.823334735261408, 
29.977387027226122, 7.801834761213367, 17.785383020236186, 19.422964797862317, 
151.17382279777172, 14.911618161699694, 13.930682930636632, 112.722060490484], 
"eval_len": [45, 17, 31, 16, 18, 25, 71, 15, 15, 73]}

 84%|████████▍ | 839997/1000000 [9:11:14<1:21:32, 32.70it/s]global step 840000, trans_decision ep_re 60.35315875583889

{"global_step": 840000, "eval_re": [60.939113427872556, 59.99300808213702, 
12.581403868754796, 82.54919089884521, 100.45862000487814, 115.9544936146415, 
11.083640834272506, 16.112773016287107, 83.79586646359854, 60.06347734710137], 
"eval_len": [67, 46, 18, 53, 59, 81, 18, 18, 58, 57]}

 85%|████████▍ | 849997/1000000 [9:17:50<1:15:55, 32.93it/s]global step 850000, trans_decision ep_re 43.601529832043084

{"global_step": 850000, "eval_re": [73.44802872640152, 9.08776285047624, 
119.61669958249529, 12.355189514286591, 16.9994321767014, 7.169374957776285, 
15.786293107534707, 49.97111381749899, 117.69943814409567, 13.88196544316422], 
"eval_len": [56, 16, 86, 15, 19, 11, 20, 56, 65, 17]}

 86%|████████▌ | 859997/1000000 [9:24:24<1:10:51, 32.93it/s]global step 860000, trans_decision ep_re 34.199253507532845

{"global_step": 860000, "eval_re": [43.4239706473925, 15.97872028813172, 
81.90734204869881, 88.8819831218764, 8.552441742906376, 16.55745139756223, 
44.39476032005366, 12.449767550828753, 10.145060309764007, 19.701037648114006], 
"eval_len": [51, 17, 51, 58, 16, 19, 39, 14, 13, 31]}

 87%|████████▋ | 869997/1000000 [9:30:59<1:05:59, 32.84it/s]global step 870000, trans_decision ep_re 47.81587841605359

{"global_step": 870000, "eval_re": [44.05468546558629, 84.37795360889714, 
87.17688906035886, 7.75749524993902, 127.72968442618581, 72.78383623040129, 
16.090804301760606, 16.33075765370641, 13.884342080534353, 7.972336083166171], 
"eval_len": [35, 75, 53, 18, 65, 66, 19, 17, 16, 15]}

 88%|████████▊ | 879997/1000000 [9:37:33<1:00:47, 32.90it/s]global step 880000, trans_decision ep_re 36.93181416049816

{"global_step": 880000, "eval_re": [13.641190301330468, 38.49349879993805, 
74.38938189107516, 11.50074511587368, 10.124639226710809, 18.882590419280067, 
8.44355235983711, 10.259976608874753, 9.01995003165735, 174.56261685040423], 
"eval_len": [16, 31, 79, 19, 15, 19, 17, 13, 11, 108]}

 89%|████████▉ | 889997/1000000 [9:44:07<55:34, 32.99it/s]global step 890000, trans_decision ep_re 33.64565795547216

{"global_step": 890000, "eval_re": [166.36513624468157, 18.573388424362975, 
11.793819725500976, 71.91615085102573, 10.471651068030882, 8.694255995970328, 
13.233060462494677, 13.734899831141467, 9.558083673479082, 12.116133278033887], 
"eval_len": [94, 22, 18, 52, 13, 16, 17, 15, 17, 20]}

 90%|████████▉ | 899997/1000000 [9:50:43<50:42, 32.87it/s]global step 900000, trans_decision ep_re 32.96703369248497

{"global_step": 900000, "eval_re": [9.943584079273037, 19.960026143681986, 
14.750853280662293, 12.718245065816516, 18.20230923747096, 14.096575918520607, 
80.57089039356191, 128.77466250379575, 16.44711310263883, 14.206077199427819], 
"eval_len": [19, 34, 19, 16, 17, 29, 53, 78, 16, 34]}

 91%|█████████ | 909997/1000000 [9:57:19<45:27, 32.99it/s]global step 910000, trans_decision ep_re 78.40436651818834

{"global_step": 910000, "eval_re": [196.70593365630782, 10.394583074609848, 
138.1542959669238, 137.41093740986997, 9.368354635563332, 142.5090924201927, 
51.486374708336086, 50.15434907385494, 6.529474914610992, 41.33026932161389], 
"eval_len": [118, 13, 80, 91, 18, 92, 61, 43, 9, 43]}

 92%|█████████▏| 919997/1000000 [10:03:55<40:34, 32.86it/s]global step 920000, trans_decision ep_re 73.27883510603166

{"global_step": 920000, "eval_re": [275.14917529670265, 115.44457173918323, 
67.27059637713643, 10.788979513986956, 49.340658237724966, 9.553343830357699, 
51.520483120096586, 62.931751188680266, 79.02615514290584, 11.762636613541913], 
"eval_len": [126, 60, 52, 13, 54, 15, 59, 53, 62, 17]}

 93%|█████████▎| 929997/1000000 [10:10:31<35:18, 33.04it/s]global step 930000, trans_decision ep_re 49.828242120555785

{"global_step": 930000, "eval_re": [10.49569673305071, 58.13803183107267, 
54.6669869971581, 12.399712485895398, 70.67213787814426, 53.2568161829557, 
194.1831983585429, 11.979765086882274, 18.78718919505755, 13.70288645679829], 
"eval_len": [17, 58, 47, 14, 61, 49, 94, 13, 30, 27]}

 94%|█████████▍| 939997/1000000 [10:17:08<30:21, 32.95it/s]global step 940000, trans_decision ep_re 37.98422608965562

{"global_step": 940000, "eval_re": [16.793610521957863, 67.96631504653902, 
10.259457794127396, 79.98305883102935, 15.902762183302025, 64.62912536055762, 
11.379623270845281, 15.635332098275075, 85.77864825743904, 11.514327532483572], 
"eval_len": [17, 71, 14, 46, 25, 43, 13, 18, 58, 14]}

 95%|█████████▍| 949997/1000000 [10:23:43<25:17, 32.95it/s]global step 950000, trans_decision ep_re 118.96363916315734

{"global_step": 950000, "eval_re": [183.65784500181493, 216.79622549176855, 
57.74603362039279, 223.53065688455524, 164.2583531265974, 24.127097450979676, 
101.43434547242605, 20.74141632784672, 187.43923029161064, 9.905187963581588], 
"eval_len": [85, 137, 51, 133, 96, 24, 58, 24, 123, 12]}

 96%|█████████▌| 959996/1000000 [10:30:19<20:17, 32.85it/s]global step 960000, trans_decision ep_re 103.66695195683806

{"global_step": 960000, "eval_re": [333.74195764279136, 8.520078268163246, 
78.92873656244564, 11.439559987827465, 11.77147391553941, 41.63305387912903, 
109.88960292997176, 201.58621960621377, 156.7089925893299, 82.44984418696895], 
"eval_len": [129, 13, 50, 13, 19, 54, 60, 109, 87, 67]}

 97%|█████████▋| 969996/1000000 [10:36:54<15:01, 33.30it/s]global step 970000, trans_decision ep_re 68.68200419275232

{"global_step": 970000, "eval_re": [137.01090965760582, 58.91952230810981, 
106.87566386287347, 8.236107196885726, 12.932284453115553, 191.38839730724786, 
121.17863264555086, 14.845601771589827, 18.04045757637088, 17.392465148173425], 
"eval_len": [94, 59, 57, 10, 18, 97, 71, 29, 19, 23]}

 98%|█████████▊| 979996/1000000 [10:43:40<10:10, 32.78it/s]global step 980000, trans_decision ep_re 83.30542432060464

{"global_step": 980000, "eval_re": [113.91255827972776, 414.7080116959356, 
12.754077814601759, 47.87602284890652, 11.802117115482726, 9.025755638473091, 
82.45566750236458, 10.515181081961202, 15.838545089362853, 114.16630613923039], 
"eval_len": [62, 190, 17, 40, 16, 11, 72, 25, 21, 60]}

 99%|█████████▉| 989996/1000000 [10:50:04<05:04, 32.85it/s]global step 990000, trans_decision ep_re 65.50473903475084

{"global_step": 990000, "eval_re": [18.477512719930854, 196.63872345444693, 
13.983893711796611, 13.885017255559653, 11.383734941284951, 57.47384487390003, 
113.96396675357191, 7.918579697027175, 204.73103880340767, 16.591078136582762], 
"eval_len": [29, 103, 17, 16, 16, 58, 80, 17, 118, 19]}

100%|█████████▉| 999996/1000000 [10:56:50<00:00, 32.91it/s]global step 1000000, trans_decision ep_re 83.64321509493834

{"global_step": 1000000, "eval_re": [178.72190811716317, 41.445990499896936, 
230.84021670306385, 95.82453965467329, 12.436996691297871, 131.21068406823153, 
15.553868405421106, 102.32024231859455, 18.279215748739716, 9.798488742301451], 
"eval_len": [93, 35, 222, 90, 23, 107, 16, 82, 18, 14]}

100%|██████████| 1000000/1000000 [10:56:50<00:00, 25.37it/s]
