
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [05:30<12:43:05, 21.62it/s]global step 10000, trans_decision ep_re 84.37435665740763

{"global_step": 10000, "eval_re": [71.3667675048156, 98.76684918683968, 
75.8883563712014, 78.7750437356847, 127.26027659349732, 91.8708056061848, 
66.03609789262767, 77.2601102797095, 71.16463554334011, 85.35462386017564], 
"eval_len": [57, 74, 60, 61, 102, 69, 55, 62, 58, 64]}

  2%|▏         | 19998/1000000 [16:00<12:41:27, 21.45it/s]global step 20000, trans_decision ep_re 91.10475371812404

{"global_step": 20000, "eval_re": [75.19071595861153, 156.46662776926973, 
168.8597174894102, 25.636964106850375, 81.1999735718296, 95.90147473976818, 
79.30897506722937, 120.3448909559734, 25.346652277935213, 82.79154524436292], 
"eval_len": [60, 100, 101, 27, 64, 71, 58, 85, 26, 62]}

  3%|▎         | 29999/1000000 [26:30<12:44:20, 21.15it/s]global step 30000, trans_decision ep_re 87.2263307633178

{"global_step": 30000, "eval_re": [31.74726317593, 100.07307695620189, 
189.07489631338444, 33.4455110987347, 30.68112428394155, 28.79935978185691, 
150.49812260437534, 28.702691949924375, 91.27950333780721, 187.9617581310216], 
"eval_len": [38, 78, 119, 40, 32, 34, 103, 31, 69, 113]}

  4%|▍         | 39999/1000000 [37:10<12:30:45, 21.31it/s]global step 40000, trans_decision ep_re 84.49293199538451

{"global_step": 40000, "eval_re": [73.26661720915683, 26.00541443030498, 
30.092780225774824, 102.54616647076091, 145.32479853285207, 176.79538113120506, 
31.220313223762854, 23.455532070725464, 122.6217452033727, 113.6005714559293], 
"eval_len": [54, 33, 35, 81, 136, 119, 36, 32, 80, 77]}

  5%|▍         | 49999/1000000 [47:40<12:34:34, 20.98it/s]global step 50000, trans_decision ep_re 60.666813580521136

{"global_step": 50000, "eval_re": [32.92454638066756, 121.42371641853084, 
100.56390580443039, 88.81377540580598, 30.213149989301666, 25.522609955086804, 
73.86975864817548, 27.703908568821745, 25.90206549205909, 79.73069914233176], 
"eval_len": [33, 86, 81, 62, 34, 29, 58, 34, 31, 59]}

  6%|▌         | 59999/1000000 [58:20<12:12:50, 21.38it/s]global step 60000, trans_decision ep_re 75.75385676347398

{"global_step": 60000, "eval_re": [88.3409781659955, 79.24227836976472, 
30.6332659331025, 26.218978188203664, 83.48145854649864, 108.62572472329607, 
90.3267331421124, 83.58725006464701, 77.47711273067299, 89.60478777044634], 
"eval_len": [73, 58, 32, 31, 63, 78, 63, 57, 59, 70]}

  7%|▋         | 69999/1000000 [1:09:00<12:15:41, 21.07it/s]global step 70000, trans_decision ep_re 84.89059873551467

{"global_step": 70000, "eval_re": [79.98050801603532, 25.02422837583153, 
122.7221683366723, 85.45699391284212, 80.70426831454803, 123.10497550045713, 
33.16144851567489, 117.9391565389304, 84.47064479046922, 96.3415950536858], 
"eval_len": [60, 33, 81, 59, 58, 80, 40, 78, 63, 73]}

  8%|▊         | 79997/1000000 [1:19:30<11:53:06, 21.50it/s]global step 80000, trans_decision ep_re 86.92253756525058

{"global_step": 80000, "eval_re": [74.34551840169956, 125.81926450110512, 
72.31088703964367, 174.9238330779329, 29.930772886360057, 70.26824124359494, 
28.560724917845878, 74.28896243613279, 191.3404186878409, 27.436752460350096], 
"eval_len": [57, 88, 58, 115, 38, 60, 40, 58, 120, 29]}

  9%|▉         | 89999/1000000 [1:30:00<11:58:56, 21.10it/s]global step 90000, trans_decision ep_re 98.80147155854112

{"global_step": 90000, "eval_re": [58.15164708734388, 146.1395373418483, 
99.78843526484599, 53.17664824354559, 97.82237048286837, 149.64631198308933, 
81.32895607345871, 125.12939982392625, 81.90970153481554, 94.92170774966914], 
"eval_len": [52, 105, 78, 48, 72, 89, 69, 86, 59, 68]}

 10%|▉         | 99999/1000000 [1:40:40<11:50:41, 21.11it/s]global step 100000, trans_decision ep_re 105.9568224439012

{"global_step": 100000, "eval_re": [21.247816121726462, 35.02432715388148, 
41.0352265996406, 124.16833095548407, 134.79465323046287, 90.24053144131173, 
321.8750696738042, 171.58300842644266, 35.37884838152638, 84.22041245473163], 
"eval_len": [25, 37, 42, 77, 90, 72, 160, 102, 37, 66]}

 11%|█         | 109999/1000000 [1:51:10<11:45:10, 21.04it/s]global step 110000, trans_decision ep_re 101.54125546736898

{"global_step": 110000, "eval_re": [81.32638592933613, 125.34475310061718, 
115.33766326272392, 101.07565141265145, 111.36832942596848, 84.54990711379197, 
65.7720978970454, 123.70059570296033, 131.63426259026514, 75.30290823832989], 
"eval_len": [61, 86, 83, 70, 74, 63, 51, 107, 88, 57]}

 12%|█▏        | 119999/1000000 [2:01:40<11:35:48, 21.08it/s]global step 120000, trans_decision ep_re 77.86482946976261

{"global_step": 120000, "eval_re": [102.94647826276179, 28.17565803206083, 
21.913695831056796, 33.0552373530619, 39.332905410055645, 117.29725189939987, 
193.78858627336146, 30.884676092576267, 113.01309105325585, 98.24071449003564], 
"eval_len": [76, 33, 25, 37, 38, 76, 128, 36, 75, 67]}

 13%|█▎        | 129999/1000000 [2:12:10<11:23:33, 21.21it/s]global step 130000, trans_decision ep_re 115.84138927056324

{"global_step": 130000, "eval_re": [181.60893995056892, 96.67259844945313, 
230.52916746507194, 75.42917306877098, 88.2074703182642, 81.65580366009047, 
131.41506908432427, 79.16214970825978, 170.97308869949129, 22.76043230133736], 
"eval_len": [120, 68, 132, 56, 64, 62, 92, 56, 117, 28]}

 14%|█▍        | 139999/1000000 [2:22:40<11:14:18, 21.26it/s]global step 140000, trans_decision ep_re 150.33602629705916

{"global_step": 140000, "eval_re": [81.79945756876577, 145.45064397586566, 
106.17698073540977, 77.06849958189237, 34.2099310615999, 179.06457767773864, 
321.9772831623294, 61.21431348520488, 439.47365097037493, 56.924924751410316], 
"eval_len": [63, 91, 74, 59, 38, 117, 158, 50, 180, 49]}

 15%|█▍        | 149998/1000000 [2:33:20<11:03:32, 21.35it/s]global step 150000, trans_decision ep_re 70.06705915001089

{"global_step": 150000, "eval_re": [26.47366739387098, 60.868507506610136, 
127.75599384142396, 57.46669533581328, 178.27296473163435, 34.48191750615392, 
101.94333510435845, 56.70559007678219, 38.89369126714901, 17.808228736312717], 
"eval_len": [33, 52, 82, 53, 119, 38, 75, 49, 37, 20]}

 16%|█▌        | 159999/1000000 [2:43:50<10:58:38, 21.26it/s]global step 160000, trans_decision ep_re 143.99685008714317

{"global_step": 160000, "eval_re": [59.58549167750659, 69.00991097140766, 
31.723545287061746, 738.1983244758045, 65.33492194470179, 112.12421923877487, 
93.07784024049138, 120.96204374694969, 35.9637980690557, 113.98840521967772], 
"eval_len": [53, 54, 36, 331, 54, 86, 85, 86, 40, 85]}

 17%|█▋        | 169999/1000000 [2:54:30<10:54:00, 21.15it/s]global step 170000, trans_decision ep_re 103.7023416127029

{"global_step": 170000, "eval_re": [66.53889979015871, 30.806967657282364, 
164.85429354507963, 32.422547374632195, 56.77711158413303, 170.16249782547789, 
39.19597445287444, 211.7062708922601, 167.24501749097254, 97.3138355141579], 
"eval_len": [54, 37, 101, 38, 51, 113, 37, 121, 101, 70]}

 18%|█▊        | 179999/1000000 [3:05:00<10:42:24, 21.27it/s]global step 180000, trans_decision ep_re 153.5905715693518

{"global_step": 180000, "eval_re": [124.7222283422218, 105.83901401299175, 
98.13763644855038, 171.2039548500664, 294.900604599935, 136.48938038573982, 
30.28280331411833, 224.01120153853827, 107.7584669994851, 242.5604252018711], 
"eval_len": [85, 74, 72, 110, 157, 94, 30, 139, 75, 141]}

 19%|█▉        | 189999/1000000 [3:15:40<10:36:52, 21.20it/s]global step 190000, trans_decision ep_re 74.70149126648978

{"global_step": 190000, "eval_re": [30.196887143540856, 82.57636251921237, 
145.90791634962395, 141.3451674569631, 32.05427438956675, 98.93000001809908, 
114.23954750550074, 32.55256933460834, 29.686122674021593, 39.52606527376115], 
"eval_len": [36, 57, 103, 92, 34, 81, 87, 35, 31, 39]}

 20%|█▉        | 199998/1000000 [3:26:10<10:19:30, 21.52it/s]global step 200000, trans_decision ep_re 173.47734123445167

{"global_step": 200000, "eval_re": [212.53509155710194, 194.98594300136858, 
157.63080580241294, 114.47422141925723, 141.95447245684787, 167.34377815862348, 
539.5309391975762, 57.747940415846344, 119.54802206513774, 29.022198270344386], 
"eval_len": [112, 112, 102, 91, 103, 103, 228, 52, 87, 30]}

 21%|██        | 209999/1000000 [3:36:40<10:22:43, 21.14it/s]global step 210000, trans_decision ep_re 117.27553373451549

{"global_step": 210000, "eval_re": [53.56441815513362, 141.80356794618643, 
192.5214993972708, 147.9349275326802, 131.7861350503178, 109.42042754070603, 
31.519262110649215, 72.27211733534706, 79.13487312420587, 212.79810915265767], 
"eval_len": [52, 92, 106, 92, 77, 83, 36, 61, 56, 126]}

 22%|██▏       | 219999/1000000 [3:47:10<10:11:52, 21.25it/s]global step 220000, trans_decision ep_re 70.02489533613064

{"global_step": 220000, "eval_re": [80.76762324191957, 83.48788640788186, 
108.2064854388966, 28.304606904357165, 39.66767502616516, 97.25096420616366, 
94.97447604558529, 104.78875710409503, 34.08205502388492, 28.718423962357175], 
"eval_len": [64, 67, 70, 33, 40, 74, 64, 85, 38, 32]}

 23%|██▎       | 229999/1000000 [3:57:30<10:03:40, 21.26it/s]global step 230000, trans_decision ep_re 127.80576497633633

{"global_step": 230000, "eval_re": [158.02619042141302, 110.68542844099812, 
115.59047192385233, 375.8357132062152, 87.8287948271502, 102.385579091612, 
30.663664025222, 99.19965494446606, 55.24501540637095, 142.5971374760635], 
"eval_len": [102, 70, 73, 159, 71, 73, 34, 77, 56, 92]}

 24%|██▍       | 239999/1000000 [4:08:20<9:54:37, 21.30it/s]global step 240000, trans_decision ep_re 183.63639411860268

{"global_step": 240000, "eval_re": [248.5228419181927, 96.99487684236446, 
300.1506349123183, 292.56629474928855, 183.31658606121258, 212.00328419616318, 
230.18718095699361, 27.19972812896781, 176.93817484314235, 68.48433857738306], 
"eval_len": [124, 70, 144, 136, 104, 118, 117, 32, 107, 48]}

 25%|██▍       | 249997/1000000 [4:18:50<9:45:08, 21.36it/s]global step 250000, trans_decision ep_re 140.11628495003117

{"global_step": 250000, "eval_re": [163.6739521935193, 346.96581232462944, 
172.96773237765314, 29.693163720581154, 177.3522104290124, 117.53154477106365, 
26.926197209457563, 174.779474998252, 159.244165747165, 32.02859572897818], 
"eval_len": [112, 179, 118, 33, 111, 79, 34, 112, 99, 35]}

 26%|██▌       | 259999/1000000 [4:29:20<9:43:59, 21.12it/s]global step 260000, trans_decision ep_re 133.58342491954775

{"global_step": 260000, "eval_re": [156.64174941379557, 27.318381100505505, 
185.8488484265132, 154.20125104176813, 127.20424889735047, 122.87743359484257, 
177.15922177846966, 125.13316265340139, 100.8972520071171, 158.55270028171407], 
"eval_len": [105, 29, 116, 95, 104, 83, 113, 83, 72, 102]}

 27%|██▋       | 269999/1000000 [4:40:00<9:32:39, 21.25it/s]global step 270000, trans_decision ep_re 142.40523579439855

{"global_step": 270000, "eval_re": [118.98983248832425, 452.7593187613302, 
165.34051799908315, 111.57419482897888, 80.39596082246442, 34.52051863360835, 
228.7647173107098, 33.05756462402429, 96.5274297036425, 102.12230277181982], 
"eval_len": [79, 185, 107, 80, 60, 38, 125, 36, 64, 67]}

 28%|██▊       | 279998/1000000 [4:50:30<9:16:44, 21.55it/s]global step 280000, trans_decision ep_re 167.5952259030716

{"global_step": 280000, "eval_re": [138.917689211835, 31.104118043010306, 
316.5863482250824, 122.92538245647984, 123.82743087091838, 113.61233956591072, 
187.77523054640736, 319.311436372007, 160.73526205868976, 161.1570216803751], 
"eval_len": [87, 33, 151, 96, 90, 79, 129, 147, 101, 101]}

 29%|██▉       | 289999/1000000 [5:01:00<9:15:50, 21.29it/s]global step 290000, trans_decision ep_re 127.42867905844336

{"global_step": 290000, "eval_re": [38.459272016162814, 306.73261658915203, 
66.95196109725305, 111.89547388642539, 124.45382277734747, 219.06814755000968, 
133.70357348869416, 129.9267190286167, 115.24368030692442, 27.85152384384787], 
"eval_len": [42, 157, 56, 88, 92, 122, 94, 97, 92, 30]}

 30%|██▉       | 299999/1000000 [5:11:40<9:08:37, 21.27it/s]global step 300000, trans_decision ep_re 146.19662345820967

{"global_step": 300000, "eval_re": [167.43784770354833, 520.7107079442205, 
69.0354173843671, 128.9354000375377, 112.47615156386918, 158.95573838756212, 
177.00787704532016, 28.025812418460703, 69.68626869527668, 29.69501340193404], 
"eval_len": [101, 189, 60, 89, 96, 106, 99, 33, 56, 32]}

 31%|███       | 309997/1000000 [5:22:10<9:02:16, 21.21it/s]global step 310000, trans_decision ep_re 131.48485876487274

{"global_step": 310000, "eval_re": [72.52887235415453, 101.12080840860645, 
91.61444067578996, 22.897984716019224, 113.49821641583539, 157.54368211322708, 
180.91361622168293, 310.2587151600971, 134.46463983288677, 130.00761175042797], 
"eval_len": [52, 73, 69, 27, 75, 104, 117, 166, 90, 87]}

 32%|███▏      | 319999/1000000 [5:32:30<8:52:06, 21.30it/s]global step 320000, trans_decision ep_re 105.73260446579741

{"global_step": 320000, "eval_re": [66.52131522588722, 123.33897734853352, 
95.68714694676282, 133.90125252433526, 121.16323435609671, 175.85347205717585, 
22.300616127142046, 127.72941346068912, 32.456669004314605, 158.373947607037], 
"eval_len": [51, 85, 83, 89, 88, 120, 34, 101, 36, 100]}

 33%|███▎      | 329998/1000000 [5:43:20<8:40:03, 21.47it/s]global step 330000, trans_decision ep_re 120.84179206440172

{"global_step": 330000, "eval_re": [224.26045063120492, 27.615661385094967, 
196.7409008432925, 188.48292407208524, 118.47088145002638, 27.664021839252495, 
95.33346825250541, 132.13898981066745, 68.40995194626915, 129.30067041361875], 
"eval_len": [116, 32, 120, 103, 75, 36, 76, 85, 59, 80]}

 34%|███▍      | 339999/1000000 [5:53:50<8:36:25, 21.30it/s]global step 340000, trans_decision ep_re 112.25983900549667

{"global_step": 340000, "eval_re": [23.47747850341024, 92.21870386872418, 
170.53253898930467, 76.61247239741834, 79.13640699626295, 93.77626780980836, 
144.16205276680503, 88.69887427323727, 268.8293957665348, 85.1541986834609], 
"eval_len": [29, 74, 112, 50, 57, 64, 94, 63, 139, 63]}

 35%|███▍      | 349999/1000000 [6:04:10<8:25:17, 21.44it/s]global step 350000, trans_decision ep_re 126.88628991157259

{"global_step": 350000, "eval_re": [304.97997435802984, 169.63199419629697, 
70.01989619094893, 89.21233920624374, 215.44385393909323, 95.9108067009479, 
82.66549130205445, 36.75155322971166, 111.78947007309488, 92.45751991930442], 
"eval_len": [143, 108, 50, 69, 127, 70, 60, 37, 71, 71]}

 36%|███▌      | 359999/1000000 [6:15:00<8:22:30, 21.23it/s]global step 360000, trans_decision ep_re 137.0556525248788

{"global_step": 360000, "eval_re": [34.59299995591506, 233.05398185655838, 
158.70757622864147, 29.945372042226648, 138.8534640416589, 246.86392242178238, 
95.98977083174444, 194.03558641973555, 94.37426088997515, 144.13959056054995], 
"eval_len": [37, 123, 121, 35, 94, 130, 69, 131, 69, 94]}

 37%|███▋      | 369999/1000000 [6:25:30<8:14:10, 21.25it/s]global step 370000, trans_decision ep_re 125.12254883834078

{"global_step": 370000, "eval_re": [96.36674701256952, 31.154165185775852, 
109.87846132762282, 105.53596237710417, 97.42873616764454, 102.21928254027512, 
163.20522211785934, 33.812184919521954, 478.56017836784577, 33.06454836718867], 
"eval_len": [70, 34, 78, 76, 69, 75, 94, 37, 197, 39]}

 38%|███▊      | 379998/1000000 [6:36:00<8:01:45, 21.45it/s]global step 380000, trans_decision ep_re 100.21324975641372

{"global_step": 380000, "eval_re": [111.52127647127884, 28.45646833778904, 
115.8747131553974, 132.92519266125674, 28.615710694677524, 91.8765815648935, 
119.1494352140711, 75.97051475531633, 261.5472580737645, 36.195346635692154], 
"eval_len": [78, 34, 80, 87, 33, 70, 86, 61, 131, 37]}

 39%|███▉      | 389999/1000000 [6:46:30<7:57:00, 21.31it/s]global step 390000, trans_decision ep_re 100.26859608601958

{"global_step": 390000, "eval_re": [120.84024411938209, 115.50481525417644, 
133.17674359583577, 206.13026835104034, 21.054872919894226, 113.61530061611998, 
109.08532181814451, 33.03803619576819, 117.75951296070419, 32.48084502913002], 
"eval_len": [84, 78, 91, 122, 31, 76, 76, 33, 86, 35]}

 40%|███▉      | 399999/1000000 [6:57:00<7:47:22, 21.40it/s]global step 400000, trans_decision ep_re 137.49996909758357

{"global_step": 400000, "eval_re": [97.91285015774501, 373.3615948562454, 
118.12823664702398, 103.86182251487806, 99.18716753371915, 249.82055846804525, 
31.34161576423015, 56.92461330185078, 135.0802969572887, 109.3809347748092], 
"eval_len": [68, 169, 79, 70, 72, 127, 35, 56, 94, 81]}

 41%|████      | 409999/1000000 [7:07:30<7:39:00, 21.42it/s]global step 410000, trans_decision ep_re 96.53921680388606

{"global_step": 410000, "eval_re": [115.15619494788749, 165.48310262135146, 
145.9495273940663, 33.16399928887744, 108.01922348837542, 30.716002126799093, 
26.03457463596633, 189.90281461045026, 28.423231431350622, 122.54349749373625], 
"eval_len": [76, 97, 90, 41, 82, 35, 33, 108, 35, 87]}

 42%|████▏     | 419999/1000000 [7:18:00<7:32:28, 21.36it/s]global step 420000, trans_decision ep_re 112.26014937287695

{"global_step": 420000, "eval_re": [25.01276536841463, 67.22381046808334, 
161.19033400053416, 168.4398303979161, 253.57653035679314, 117.78830351409994, 
96.36492808444277, 36.41326619493554, 97.59587507316849, 98.99585027038133], 
"eval_len": [28, 50, 100, 99, 126, 83, 71, 40, 72, 81]}

 43%|████▎     | 429998/1000000 [7:28:40<7:19:43, 21.60it/s]global step 430000, trans_decision ep_re 123.53859873177248

{"global_step": 430000, "eval_re": [386.81403146238154, 71.598148538412, 
20.86991926938159, 67.07275375088227, 73.7641710603386, 70.914966121644, 
237.72003167780494, 28.196465637031913, 108.95705378167195, 169.47844601817582],
"eval_len": [159, 61, 26, 56, 61, 60, 119, 30, 78, 98]}

 44%|████▍     | 439998/1000000 [7:39:10<7:11:08, 21.65it/s]global step 440000, trans_decision ep_re 111.55594776280284

{"global_step": 440000, "eval_re": [82.81203225659992, 68.48031623275357, 
32.30099502706, 28.812948452473755, 22.831149839154946, 72.70029418931789, 
29.07732108441441, 167.78502041999442, 335.84159545326463, 274.9178046729948], 
"eval_len": [62, 51, 39, 36, 26, 54, 31, 101, 159, 126]}

 45%|████▍     | 449999/1000000 [7:49:40<7:07:23, 21.45it/s]global step 450000, trans_decision ep_re 171.41328987467597

{"global_step": 450000, "eval_re": [165.8453309607151, 179.81188212206774, 
108.8959780335218, 188.73905377463302, 188.53096219527086, 340.7417272233006, 
102.68233668851543, 148.95495373143885, 138.97280027037286, 150.9578737469232], 
"eval_len": [109, 124, 76, 111, 124, 167, 73, 89, 86, 90]}

 46%|████▌     | 459998/1000000 [8:00:10<6:53:47, 21.75it/s]global step 460000, trans_decision ep_re 89.9845667532669

{"global_step": 460000, "eval_re": [122.49827030004566, 97.56371709652672, 
95.59370423366914, 28.993192209785175, 30.953459610716987, 35.4057291106908, 
27.26134571902487, 23.3557698201911, 41.787422694183924, 396.43305673783476], 
"eval_len": [81, 69, 70, 35, 33, 37, 38, 33, 41, 158]}

 47%|████▋     | 469999/1000000 [8:10:40<6:56:48, 21.19it/s]global step 470000, trans_decision ep_re 86.143401466557

{"global_step": 470000, "eval_re": [147.8609368092892, 24.266799278545207, 
71.06026761161397, 128.971438488357, 23.831910255332026, 26.688288335911107, 
33.67947666551649, 129.2148695686402, 140.48389638145653, 135.37613127090825], 
"eval_len": [86, 30, 49, 82, 31, 31, 39, 82, 92, 87]}

 48%|████▊     | 479999/1000000 [8:21:10<6:44:28, 21.43it/s]global step 480000, trans_decision ep_re 91.5537601602123

{"global_step": 480000, "eval_re": [70.01814253481832, 73.95325694350062, 
72.82767915054656, 93.0348100196474, 110.50744460109264, 89.81645892359386, 
161.44083018071325, 36.73198509157176, 109.14426477294886, 98.0627293836898], 
"eval_len": [52, 62, 53, 68, 74, 71, 101, 42, 78, 70]}

 49%|████▉     | 489999/1000000 [8:31:40<6:37:00, 21.41it/s]global step 490000, trans_decision ep_re 165.2387517521155

{"global_step": 490000, "eval_re": [23.925473020384665, 363.13075174193904, 
142.63406696839712, 183.29642699782764, 127.84530821329939, 32.723960555862305, 
163.12294659139883, 153.7920448421185, 122.32349142699178, 339.5930471629358], 
"eval_len": [27, 157, 95, 106, 80, 33, 106, 117, 89, 158]}

 50%|████▉     | 499999/1000000 [8:42:20<6:29:32, 21.39it/s]global step 500000, trans_decision ep_re 140.2810415461977

{"global_step": 500000, "eval_re": [230.68495440914427, 215.9291743763714, 
104.10230801555744, 174.99939932821653, 141.39701193908567, 211.36667178483512, 
104.31319641675789, 67.53011086766172, 123.33173787366347, 29.15585045068331], 
"eval_len": [127, 117, 78, 112, 112, 118, 68, 54, 82, 31]}

 51%|█████     | 509999/1000000 [8:52:50<6:23:02, 21.32it/s]global step 510000, trans_decision ep_re 176.91605020922617

{"global_step": 510000, "eval_re": [162.7952233495439, 23.040248451574048, 
120.58873088657636, 134.8984652909056, 170.84060656811863, 265.271601746893, 
241.70885314708227, 107.85069824888267, 175.32212612080346, 366.84394828188147],
"eval_len": [108, 31, 90, 90, 112, 130, 128, 86, 118, 160]}

 52%|█████▏    | 519998/1000000 [9:03:20<6:08:50, 21.69it/s]global step 520000, trans_decision ep_re 86.3221435628883

{"global_step": 520000, "eval_re": [27.892865464827047, 178.8408569849711, 
107.78167535884232, 102.18778648676921, 67.68968934507673, 136.22856512959765, 
69.65319172397068, 32.962482540315854, 107.79252656935854, 32.19179602515389], 
"eval_len": [31, 104, 73, 70, 51, 89, 50, 38, 78, 35]}

 53%|█████▎    | 529998/1000000 [9:13:50<6:02:42, 21.60it/s]global step 530000, trans_decision ep_re 144.25736445555165

{"global_step": 530000, "eval_re": [285.53237460881707, 156.4542070695464, 
22.328337084131363, 107.10598981315235, 123.85674043302367, 198.51064354296986, 
104.56155685924355, 102.17652030524287, 106.81103660311851, 235.2362382362708], 
"eval_len": [137, 96, 30, 86, 80, 108, 79, 76, 73, 128]}

 54%|█████▍    | 539998/1000000 [9:24:20<5:55:10, 21.59it/s]global step 540000, trans_decision ep_re 92.83718639991109

{"global_step": 540000, "eval_re": [120.22484226112842, 28.47004982895418, 
27.61312368886749, 120.50939511166364, 157.39457777180118, 31.187093486963906, 
83.40991106838482, 161.5256895563522, 167.19413925257064, 30.843041972424526], 
"eval_len": [77, 32, 34, 77, 98, 36, 63, 99, 101, 35]}

 55%|█████▍    | 549997/1000000 [9:34:50<5:50:32, 21.40it/s]global step 550000, trans_decision ep_re 129.589707491011

{"global_step": 550000, "eval_re": [143.5042908156007, 25.041699157664635, 
117.49046877159468, 339.8451972909499, 33.46038465245045, 117.23802255921404, 
30.378310993412793, 107.55101774288188, 143.95760094395055, 237.43008198239036],
"eval_len": [95, 29, 80, 153, 41, 87, 38, 76, 87, 137]}

 56%|█████▌    | 559999/1000000 [9:45:20<5:43:27, 21.35it/s]global step 560000, trans_decision ep_re 78.11166574114121

{"global_step": 560000, "eval_re": [147.86918457410422, 164.36025031691605, 
34.18852242735067, 138.85521261933917, 30.806046878269605, 38.434030799394165, 
25.977037303789253, 27.21014676492962, 28.032433878658317, 145.38379184866093], 
"eval_len": [99, 100, 39, 97, 36, 40, 32, 33, 31, 95]}

 57%|█████▋    | 569999/1000000 [9:55:50<5:38:12, 21.19it/s]global step 570000, trans_decision ep_re 137.219864534693

{"global_step": 570000, "eval_re": [31.142491697247003, 354.45631200839273, 
33.10755510094593, 540.8015145610201, 34.64090005542121, 39.698257259312385, 
36.17276013822409, 25.58758697416202, 26.7750833326026, 249.81618421960192], 
"eval_len": [38, 146, 40, 199, 39, 44, 36, 28, 29, 122]}

 58%|█████▊    | 579999/1000000 [10:06:30<5:27:26, 21.38it/s]global step 580000, trans_decision ep_re 147.27132539876877

{"global_step": 580000, "eval_re": [115.66511785500963, 29.83696061222334, 
261.2563574766926, 150.44497080275752, 216.544517388932, 113.19530784805336, 
371.0404210247451, 31.94841160939464, 113.98593383448933, 68.79525553539034], 
"eval_len": [84, 35, 136, 94, 130, 94, 179, 33, 80, 47]}

 59%|█████▉    | 589998/1000000 [10:17:00<5:17:27, 21.53it/s]global step 590000, trans_decision ep_re 71.35300201071713

{"global_step": 590000, "eval_re": [95.54261180826686, 98.36948555162644, 
27.174867178018424, 179.9473827905304, 32.256769987329385, 95.18190108575564, 
31.739243069765315, 95.6026582069148, 27.565401019739635, 30.149699409224503], 
"eval_len": [71, 71, 28, 119, 36, 69, 35, 68, 30, 36]}

 60%|█████▉    | 599998/1000000 [10:27:30<5:09:14, 21.56it/s]global step 600000, trans_decision ep_re 131.040022391918

{"global_step": 600000, "eval_re": [144.91772491534454, 132.73325369160267, 
23.668889465845904, 99.6266650422754, 152.31372750532725, 131.6718995173454, 
113.79585682284211, 378.1461263251398, 28.51185502879144, 105.01422560466534], 
"eval_len": [97, 88, 25, 77, 103, 94, 76, 163, 32, 76]}

 61%|██████    | 609999/1000000 [10:38:00<5:06:11, 21.23it/s]global step 610000, trans_decision ep_re 127.04251511169988

{"global_step": 610000, "eval_re": [168.7336545728715, 100.35942664642934, 
78.17140747791896, 400.45391467848896, 114.95588615302817, 137.12109951357158, 
30.05768567733079, 25.735995979080943, 79.22909514497807, 135.60698527330047], 
"eval_len": [101, 84, 56, 195, 89, 88, 32, 33, 57, 92]}

 62%|██████▏   | 619998/1000000 [10:48:40<4:54:28, 21.51it/s]global step 620000, trans_decision ep_re 163.76370584771757

{"global_step": 620000, "eval_re": [748.1894512295925, 93.74647701194802, 
99.63523474556047, 66.43372343177491, 105.82821878636295, 69.88304029206607, 
99.13497069172635, 113.021558994264, 214.48336022343344, 27.281023070447034], 
"eval_len": [259, 77, 74, 51, 76, 52, 74, 78, 116, 33]}

 63%|██████▎   | 629999/1000000 [10:59:10<4:48:03, 21.41it/s]global step 630000, trans_decision ep_re 254.82304705212908

{"global_step": 630000, "eval_re": [135.1855629907216, 251.42530163807578, 
182.332249037275, 92.21138344616645, 571.4959121947967, 333.53971575364585, 
355.55448182102793, 452.2427061363263, 68.05934110317469, 106.18381640008069], 
"eval_len": [88, 140, 108, 66, 230, 153, 160, 193, 50, 75]}

 64%|██████▍   | 639999/1000000 [11:09:31<4:41:02, 21.35it/s]global step 640000, trans_decision ep_re 95.09370513394992

{"global_step": 640000, "eval_re": [30.62322838221743, 31.888593865785946, 
105.90496341060187, 106.0255383917059, 125.35392606758356, 190.21133251998143, 
109.78939717727968, 34.607596875705504, 200.21709366850607, 16.31538098013181], 
"eval_len": [37, 36, 72, 78, 86, 118, 75, 39, 112, 20]}

 65%|██████▍   | 649999/1000000 [11:20:20<4:35:17, 21.19it/s]global step 650000, trans_decision ep_re 99.89186122590579

{"global_step": 650000, "eval_re": [30.266302238224924, 38.39593327150253, 
550.8524147032613, 29.969202069324375, 85.32047093693153, 30.916221086568147, 
94.31634648799074, 31.808435836013423, 29.61772945078248, 77.4555561784585], 
"eval_len": [38, 40, 207, 32, 65, 37, 69, 35, 33, 54]}

 66%|██████▌   | 659998/1000000 [11:30:50<4:27:26, 21.19it/s]global step 660000, trans_decision ep_re 119.35616858287551

{"global_step": 660000, "eval_re": [107.63443955628189, 32.80422352102459, 
22.065741653157865, 327.16889603936596, 115.00331900846744, 104.34669743800727, 
33.70371013555411, 28.172065018226395, 288.38719939096063, 134.27539406770902], 
"eval_len": [78, 35, 27, 146, 81, 79, 36, 40, 144, 83]}

 67%|██████▋   | 669999/1000000 [11:41:30<4:20:51, 21.08it/s]global step 670000, trans_decision ep_re 145.61004897826996

{"global_step": 670000, "eval_re": [102.19479671019319, 167.7693846210592, 
201.08059036840422, 169.7990655645991, 237.76495378292122, 303.6113653326896, 
130.0800715585706, 77.9314677213291, 33.97465360179576, 31.89414052113752], 
"eval_len": [81, 108, 122, 105, 122, 152, 92, 54, 35, 40]}

 68%|██████▊   | 679999/1000000 [11:52:00<4:11:33, 21.20it/s]global step 680000, trans_decision ep_re 95.08569480336335

{"global_step": 680000, "eval_re": [24.750661856973256, 102.83763916257713, 
97.48204708008801, 112.33227433429671, 34.977791685849276, 87.30720072734519, 
107.24250466003441, 182.6141851380631, 105.36377784843799, 95.9488655399684], 
"eval_len": [29, 71, 67, 86, 37, 64, 81, 107, 77, 69]}

 69%|██████▉   | 689999/1000000 [12:02:40<4:00:52, 21.45it/s]global step 690000, trans_decision ep_re 127.1895072932621

{"global_step": 690000, "eval_re": [131.56032577053668, 114.1403162820806, 
96.6756838027781, 31.689489057748236, 149.26559041261382, 191.49784435638645, 
108.81839479973848, 141.21414879605004, 109.63872201233723, 197.39455764235123],
"eval_len": [83, 82, 82, 34, 96, 102, 77, 91, 80, 109]}

 70%|██████▉   | 699998/1000000 [12:13:10<3:51:44, 21.58it/s]global step 700000, trans_decision ep_re 139.283460976735

{"global_step": 700000, "eval_re": [169.54468611789258, 85.81671096255141, 
461.76212658640617, 90.51559579329167, 200.48911367739404, 102.69754884700369, 
151.55545849594782, 71.41211607599804, 32.04194779621888, 26.999305414645708], 
"eval_len": [101, 63, 190, 66, 124, 77, 95, 52, 38, 34]}

 71%|███████   | 709999/1000000 [12:23:40<3:47:31, 21.24it/s]global step 710000, trans_decision ep_re 87.12575781269683

{"global_step": 710000, "eval_re": [33.154786478344526, 150.00888050899852, 
64.75355938145711, 139.2326852987278, 26.13346071272361, 25.646472719413758, 
206.29376210621504, 28.86188073436541, 97.70027253792988, 99.47181764879276], 
"eval_len": [36, 105, 52, 91, 29, 30, 117, 34, 75, 79]}

 72%|███████▏  | 719999/1000000 [12:34:02<3:39:34, 21.25it/s]global step 720000, trans_decision ep_re 43.996238923070884

{"global_step": 720000, "eval_re": [30.965183864487045, 30.991757082826407, 
37.408543938925966, 147.21748343122565, 36.88211950641221, 38.16860087640558, 
21.460028476841476, 30.694026313120865, 35.14500920973662, 31.029636530727007], 
"eval_len": [35, 35, 41, 89, 40, 41, 25, 39, 40, 34]}

 73%|███████▎  | 729998/1000000 [12:44:50<3:29:52, 21.44it/s]global step 730000, trans_decision ep_re 123.33303807105995

{"global_step": 730000, "eval_re": [112.9469015782115, 28.994008193412576, 
183.15633570051503, 155.08881044724222, 142.79131018810156, 155.03357707322633, 
109.63193351422031, 142.37502115188684, 113.99795707122195, 89.31452579256089], 
"eval_len": [79, 32, 111, 96, 89, 94, 80, 93, 79, 69]}

 74%|███████▍  | 739998/1000000 [12:55:20<3:20:23, 21.62it/s]global step 740000, trans_decision ep_re 154.91865151800926

{"global_step": 740000, "eval_re": [32.094403034566966, 153.24061031209592, 
177.53415045298252, 240.7580799789568, 139.36791250681793, 73.75887435591734, 
204.60173751885588, 176.2350002844222, 151.72063935353106, 199.87510738194567], 
"eval_len": [34, 104, 104, 134, 95, 59, 120, 100, 97, 110]}

 75%|███████▍  | 749997/1000000 [13:05:50<3:16:01, 21.26it/s]global step 750000, trans_decision ep_re 124.07672738382448

{"global_step": 750000, "eval_re": [177.89129920741559, 34.14457820934545, 
93.59366008065248, 25.400165891739356, 83.70170727963455, 23.366109521034318, 
435.19087405058497, 234.74820459355297, 39.670249911895574, 93.06042509238956], 
"eval_len": [101, 41, 69, 26, 70, 27, 163, 126, 38, 68]}

 76%|███████▌  | 759999/1000000 [13:16:20<3:06:19, 21.47it/s]global step 760000, trans_decision ep_re 82.06993350758685

{"global_step": 760000, "eval_re": [74.82856305805845, 77.06491926191488, 
102.57766854096774, 97.27835813084502, 112.52154149320195, 23.715952459721297, 
87.62480371678609, 25.95050248716881, 106.03600870750051, 113.10101721970373], 
"eval_len": [56, 56, 71, 76, 78, 27, 65, 30, 81, 77]}

 77%|███████▋  | 769999/1000000 [13:26:50<3:00:01, 21.29it/s]global step 770000, trans_decision ep_re 143.590706413583

{"global_step": 770000, "eval_re": [31.11926078250043, 73.60564366294668, 
28.49367890544775, 28.252125550402866, 466.88924277513286, 280.26915614744036, 
136.89363079690466, 205.95716763790057, 29.486075160860008, 154.94108271629378],
"eval_len": [36, 55, 30, 31, 198, 143, 86, 116, 37, 92]}

 78%|███████▊  | 779999/1000000 [13:37:11<2:50:31, 21.50it/s]global step 780000, trans_decision ep_re 98.99414206858887

{"global_step": 780000, "eval_re": [31.118173670702532, 27.84540227774805, 
147.75991832029882, 147.4927540972247, 31.466904251740633, 265.8799181171681, 
102.03324805602686, 29.480108938908316, 171.10371088189888, 35.76128207417187], 
"eval_len": [34, 31, 91, 96, 39, 130, 73, 37, 101, 35]}

 79%|███████▉  | 789999/1000000 [13:48:00<2:43:19, 21.43it/s]global step 790000, trans_decision ep_re 105.91302455854381

{"global_step": 790000, "eval_re": [206.18984608602833, 171.4288490387033, 
111.99813339263387, 63.698654926439445, 24.242364625705363, 167.79456519965035, 
76.0767931301541, 30.108967294585145, 91.6024615631631, 115.98961032837526], 
"eval_len": [105, 103, 78, 64, 31, 104, 64, 35, 71, 85]}

 80%|███████▉  | 799998/1000000 [13:58:30<2:34:59, 21.51it/s]global step 800000, trans_decision ep_re 139.0789302328698

{"global_step": 800000, "eval_re": [167.61576806973963, 197.95759512442865, 
216.56355776096774, 171.30791127823986, 163.32850429861213, 29.547618144899978, 
147.28680431281435, 28.913718756376213, 110.41919314028341, 157.84863144233597],
"eval_len": [102, 112, 124, 98, 101, 34, 96, 34, 75, 101]}

 81%|████████  | 809997/1000000 [14:09:00<2:28:38, 21.30it/s]global step 810000, trans_decision ep_re 146.95539935586848

{"global_step": 810000, "eval_re": [294.56545474018355, 138.91463869967586, 
157.2657973635221, 190.72311023152525, 147.52355888330098, 131.66335048075308, 
71.00515826824126, 173.91704868787727, 141.03451296588764, 22.941363237717816], 
"eval_len": [143, 96, 103, 103, 103, 86, 52, 107, 96, 27]}

 82%|████████▏ | 819999/1000000 [14:19:40<2:20:55, 21.29it/s]global step 820000, trans_decision ep_re 184.69508180890108

{"global_step": 820000, "eval_re": [268.8665418236162, 183.5051110073763, 
232.29963098348037, 136.19962373831612, 95.51023539187956, 409.15165144829194, 
228.41205195730055, 98.94601219336137, 161.55840178300866, 32.501557762379434], 
"eval_len": [129, 116, 123, 92, 73, 193, 133, 79, 104, 34]}

 83%|████████▎ | 829999/1000000 [14:30:10<2:13:10, 21.28it/s]global step 830000, trans_decision ep_re 130.29712038623174

{"global_step": 830000, "eval_re": [102.56438458966285, 33.90825038519664, 
27.934188609357093, 212.35482902110758, 171.6021253031011, 30.832783129716024, 
225.92851883506663, 183.3779039828209, 176.51435227529277, 137.95386773099588], 
"eval_len": [76, 36, 30, 123, 101, 33, 122, 117, 109, 95]}

 84%|████████▍ | 839999/1000000 [14:40:40<2:05:53, 21.18it/s]global step 840000, trans_decision ep_re 97.31992415160545

{"global_step": 840000, "eval_re": [66.79971494039118, 84.08788039450103, 
33.70981735761565, 153.63744285068566, 108.4816300643355, 120.80673003978393, 
115.85151922438934, 67.7775831030349, 155.78637866452326, 66.26054487679414], 
"eval_len": [52, 68, 40, 106, 82, 84, 84, 52, 97, 51]}

 85%|████████▍ | 849999/1000000 [14:51:21<1:57:27, 21.28it/s]global step 850000, trans_decision ep_re 149.78068658859746

{"global_step": 850000, "eval_re": [168.79978116354613, 143.43775743623388, 
148.25034562853938, 122.12101668734388, 155.20697350476985, 201.82972161334385, 
137.73864120559236, 138.7124659221689, 141.0517601172889, 140.65840260714728], 
"eval_len": [118, 90, 91, 94, 108, 117, 97, 90, 88, 93]}

 86%|████████▌ | 859998/1000000 [15:01:51<1:50:15, 21.16it/s]global step 860000, trans_decision ep_re 156.5458787427756

{"global_step": 860000, "eval_re": [71.1956385966696, 158.8564675538277, 
26.07156709707393, 281.4675017042779, 175.56350309461638, 154.59194981305333, 
408.1181241580602, 149.86612205092638, 104.78510870293314, 34.94280465631732], 
"eval_len": [53, 100, 32, 156, 111, 109, 179, 99, 77, 39]}

 87%|████████▋ | 869999/1000000 [15:12:31<1:42:26, 21.15it/s]global step 870000, trans_decision ep_re 164.7773805243035

{"global_step": 870000, "eval_re": [444.56651354926953, 78.11533593259641, 
116.70024710019302, 29.708307721042697, 146.96741031387177, 33.53250605047693, 
152.2005644242591, 147.77277621461823, 119.41513198627209, 378.79501195043537], 
"eval_len": [177, 52, 77, 29, 90, 34, 91, 93, 81, 170]}

 88%|████████▊ | 879998/1000000 [15:23:01<1:33:22, 21.42it/s]global step 880000, trans_decision ep_re 96.68545372356711

{"global_step": 880000, "eval_re": [93.69635952227091, 72.1319378765546, 
154.30348738015354, 145.45571679550832, 170.972956579594, 84.55012708922507, 
25.157115801629207, 155.51483495052278, 31.01391973434123, 34.05808150587143], 
"eval_len": [72, 51, 96, 94, 107, 68, 28, 97, 30, 35]}

 89%|████████▉ | 889998/1000000 [15:33:31<1:24:12, 21.77it/s]global step 890000, trans_decision ep_re 112.77226106099363

{"global_step": 890000, "eval_re": [109.47397293559723, 418.57889047514834, 
95.68910288918796, 42.49776549232338, 291.06921007681404, 34.621197847864885, 
36.976733320442136, 37.693253201130524, 26.990439876328658, 34.13204449509919], 
"eval_len": [70, 175, 74, 42, 143, 38, 41, 43, 30, 38]}

 90%|████████▉ | 899998/1000000 [15:44:01<1:16:07, 21.89it/s]global step 900000, trans_decision ep_re 118.64237876805484

{"global_step": 900000, "eval_re": [29.66422137062688, 26.503293210351696, 
154.49218556701715, 279.1068195288661, 29.948802106379333, 141.66022724833087, 
130.2968083325302, 258.29717696803516, 65.66795652326479, 70.78629682514637], 
"eval_len": [39, 34, 95, 138, 32, 93, 89, 128, 52, 52]}

 91%|█████████ | 909999/1000000 [15:54:12<1:09:39, 21.53it/s]global step 910000, trans_decision ep_re 78.28272048241311

{"global_step": 910000, "eval_re": [38.71317193727003, 127.05783487476373, 
27.006886892770076, 26.185605428229014, 70.31046105384216, 138.7422540260823, 
38.1158300355648, 253.1377093282348, 30.721659477950354, 32.83579176942384], 
"eval_len": [39, 84, 33, 32, 52, 93, 39, 121, 35, 38]}

 92%|█████████▏| 919999/1000000 [16:04:51<1:02:02, 21.49it/s]global step 920000, trans_decision ep_re 92.30398037520077

{"global_step": 920000, "eval_re": [30.623448642838536, 31.01380288767172, 
121.13744182273622, 30.199441381581305, 96.41945099990133, 120.66429323120363, 
264.35191249122107, 124.41198794838148, 33.8116378208897, 70.40638652558263], 
"eval_len": [37, 36, 74, 35, 74, 92, 130, 88, 38, 53]}

 93%|█████████▎| 929999/1000000 [16:15:21<54:17, 21.49it/s]global step 930000, trans_decision ep_re 126.66684338239324

{"global_step": 930000, "eval_re": [350.9857164973127, 99.69223450678972, 
36.74106501825187, 36.68961549515613, 33.68394833471251, 29.369497188217306, 
296.58074553735753, 150.3957668290048, 143.28988021961817, 89.23996419751158], 
"eval_len": [155, 77, 37, 37, 37, 38, 139, 96, 100, 65]}

 94%|█████████▍| 939997/1000000 [16:25:51<46:29, 21.51it/s]global step 940000, trans_decision ep_re 88.53422378869918

{"global_step": 940000, "eval_re": [120.01312531619351, 100.79223455175585, 
26.847063802331405, 72.11952601434831, 110.23893608266188, 84.13731965203337, 
119.14476209260991, 28.21581730983406, 72.31920294584559, 151.5142501193779], 
"eval_len": [79, 77, 30, 70, 76, 72, 79, 32, 55, 102]}

 95%|█████████▍| 949998/1000000 [16:36:21<38:22, 21.72it/s]global step 950000, trans_decision ep_re 109.10093824661826

{"global_step": 950000, "eval_re": [105.61100041568503, 69.25930482921724, 
127.07410573418565, 28.872939677677216, 327.4186937837662, 69.11489993999113, 
101.93675823610809, 71.11916532986604, 120.51103574319427, 70.09147877649184], 
"eval_len": [78, 52, 87, 32, 151, 51, 82, 53, 86, 54]}

 96%|█████████▌| 959999/1000000 [16:46:51<30:47, 21.65it/s]global step 960000, trans_decision ep_re 118.82862479199602

{"global_step": 960000, "eval_re": [202.53141606820103, 160.64268317212108, 
86.27378840713811, 160.2342806473098, 107.78132047446029, 113.05662804382413, 
28.31309494912177, 106.65227202360892, 195.98390051506416, 26.816863619110933], 
"eval_len": [108, 109, 66, 103, 83, 85, 34, 85, 123, 36]}

 97%|█████████▋| 969999/1000000 [16:57:11<23:18, 21.46it/s]global step 970000, trans_decision ep_re 126.19955283083729

{"global_step": 970000, "eval_re": [107.6292834948322, 27.453624774116783, 
116.89555799333075, 310.3342627535484, 106.7179383720102, 68.9843911448843, 
184.63144529614814, 116.21447724885155, 108.23440830263768, 114.90013892801316],
"eval_len": [72, 29, 75, 140, 77, 54, 109, 97, 93, 82]}

 98%|█████████▊| 979999/1000000 [17:07:41<15:28, 21.54it/s]global step 980000, trans_decision ep_re 130.37639270465027

{"global_step": 980000, "eval_re": [103.25778234431279, 133.96753515833137, 
70.84719126031469, 195.2177351624753, 154.63833777897594, 161.5874694212527, 
70.48839743572806, 75.1368298243971, 171.72105730499277, 166.90159135572216], 
"eval_len": [72, 103, 53, 109, 103, 95, 52, 55, 105, 104]}

 99%|█████████▉| 989997/1000000 [17:18:11<07:47, 21.41it/s]global step 990000, trans_decision ep_re 85.487506954995

{"global_step": 990000, "eval_re": [27.978542066056384, 28.31206579192629, 
70.41631550625713, 95.576378480322, 28.06939989217412, 73.37649558300748, 
33.377872643776996, 278.35275902324224, 75.2914233288902, 144.1238172342971], 
"eval_len": [30, 27, 54, 76, 32, 53, 33, 134, 56, 86]}

100%|█████████▉| 999998/1000000 [17:28:41<00:00, 21.70it/s]global step 1000000, trans_decision ep_re 122.97637185803899

{"global_step": 1000000, "eval_re": [35.98352499244795, 163.20503817307838, 
149.9514002743548, 111.3964321212905, 173.93667737882888, 156.87425513003356, 
146.44781330758246, 148.09735697043362, 26.20299029347341, 117.66822993886636], 
"eval_len": [42, 115, 96, 78, 102, 98, 96, 95, 29, 99]}

100%|██████████| 1000000/1000000 [17:28:44<00:00, 15.89it/s]
