
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [06:30<14:46:48, 18.61it/s]global step 10000, trans_decision ep_re 126.19137354878558

{"global_step": 10000, "eval_re": [129.87262427425676, 84.48205159785843, 
140.9215787484457, 135.3455936329879, 161.32625133568726, 127.6729671453116, 
97.09874815838783, 124.26697749726394, 158.10703726441713, 102.81990583323909], 
"eval_len": [26, 17, 27, 26, 31, 25, 19, 24, 31, 20]}

  2%|▏         | 19998/1000000 [18:50<14:44:33, 18.46it/s]global step 20000, trans_decision ep_re 163.2361120280653

{"global_step": 20000, "eval_re": [130.67896199080934, 412.6934328093962, 
107.55987580144624, 140.91626469818613, 120.99215523572134, 165.35094753851445, 
95.44519538760433, 138.58320708361893, 173.5415031715838, 146.59957656377227], 
"eval_len": [26, 76, 21, 27, 23, 33, 19, 27, 33, 28]}

  3%|▎         | 29998/1000000 [31:10<14:41:33, 18.34it/s]global step 30000, trans_decision ep_re 160.02620994096088

{"global_step": 30000, "eval_re": [144.2382171238001, 96.75799014684877, 
101.52109444887739, 392.8241879002951, 176.01269929593605, 95.8993602007927, 
107.09648864760757, 102.57773245767537, 190.16269804329795, 193.17163114447777],
"eval_len": [29, 19, 20, 75, 34, 19, 21, 20, 36, 38]}

  4%|▍         | 39998/1000000 [43:40<14:29:12, 18.41it/s]global step 40000, trans_decision ep_re 132.6252007388454

{"global_step": 40000, "eval_re": [120.0121307517908, 89.81494984890972, 
101.6780167148456, 89.31645755332679, 89.47450542485002, 89.45831435576989, 
125.65222958777076, 119.83762853682619, 118.25242667010521, 382.7553479442587], 
"eval_len": [23, 18, 20, 18, 18, 18, 24, 23, 23, 76]}

  5%|▍         | 49998/1000000 [56:10<14:16:23, 18.49it/s]global step 50000, trans_decision ep_re 114.147099022383

{"global_step": 50000, "eval_re": [89.2925953756607, 156.82971368769162, 
124.98857351774429, 168.7938798702329, 101.37026215362972, 96.19441016619524, 
97.10667236680314, 120.13420036749025, 90.01082242961456, 96.74986028876776], 
"eval_len": [18, 30, 24, 33, 20, 19, 19, 24, 18, 19]}

  6%|▌         | 59998/1000000 [1:08:30<14:18:48, 18.24it/s]global step 60000, trans_decision ep_re 157.7861663152077

{"global_step": 60000, "eval_re": [427.37092134598083, 128.4206634069334, 
176.3346554644834, 120.02017714930255, 114.95837220428417, 133.8488744121681, 
90.25463483223974, 119.05675235915317, 140.20492291167395, 127.39168906585772], 
"eval_len": [82, 25, 34, 23, 22, 26, 18, 23, 27, 25]}

  7%|▋         | 69998/1000000 [1:21:00<13:58:19, 18.49it/s]global step 70000, trans_decision ep_re 135.79842272799118

{"global_step": 70000, "eval_re": [149.47594514603446, 139.5966877413654, 
96.0851050899815, 186.91831867963555, 89.3581631007086, 179.44200801594556, 
131.14696033653678, 130.53050101329816, 124.00203155652022, 131.4285065998854], 
"eval_len": [29, 27, 19, 37, 18, 37, 25, 25, 24, 25]}

  8%|▊         | 79998/1000000 [1:33:20<13:51:42, 18.44it/s]global step 80000, trans_decision ep_re 134.80084511079707

{"global_step": 80000, "eval_re": [149.99738186198098, 128.56891253333217, 
84.36302458667382, 149.46182610813437, 162.00889469138633, 106.50921297554338, 
102.58344588306642, 121.71268888030903, 146.5422690535388, 196.26079453400536], 
"eval_len": [29, 25, 17, 29, 32, 21, 20, 24, 28, 37]}

  9%|▉         | 89998/1000000 [1:45:50<13:51:32, 18.24it/s]global step 90000, trans_decision ep_re 134.18306075403603

{"global_step": 90000, "eval_re": [106.25812664153123, 173.02811027572534, 
96.38153836436156, 120.07616925248279, 84.22890865882204, 137.37293146704104, 
256.86523078979087, 96.27604743773242, 140.34555877635023, 130.9979858765228], 
"eval_len": [21, 33, 19, 23, 17, 27, 50, 19, 27, 25]}

 10%|▉         | 99998/1000000 [1:58:20<13:30:44, 18.50it/s]global step 100000, trans_decision ep_re 139.3949419292406

{"global_step": 100000, "eval_re": [135.6685303304031, 172.38255918070496, 
170.43053328226492, 84.33939314888055, 164.6069221304578, 96.06486847152613, 
145.07827104573997, 145.1316174622034, 114.15671444152727, 166.0900097986977], 
"eval_len": [26, 33, 34, 17, 31, 19, 28, 28, 22, 32]}

 11%|█         | 109998/1000000 [2:10:40<13:29:09, 18.33it/s]global step 110000, trans_decision ep_re 159.32966155401465

{"global_step": 110000, "eval_re": [84.43642633559897, 89.21321628110647, 
164.64330402241893, 169.60772112893622, 137.14460002793956, 315.73404144928634, 
211.73337316006413, 114.57755164563551, 178.0761092471973, 128.1302722419629], 
"eval_len": [17, 18, 31, 33, 27, 60, 40, 22, 35, 25]}

 12%|█▏        | 119998/1000000 [2:23:10<13:15:14, 18.44it/s]global step 120000, trans_decision ep_re 120.9535228247643

{"global_step": 120000, "eval_re": [158.09506125034665, 160.6708893546386, 
113.75121340120423, 107.92343052454252, 141.16629714000953, 132.23806118800454, 
90.33708420456206, 89.57039601484925, 112.93562134542698, 102.84717382405863], 
"eval_len": [31, 31, 22, 21, 27, 26, 18, 18, 22, 20]}

 13%|█▎        | 129998/1000000 [2:35:40<12:56:36, 18.67it/s]global step 130000, trans_decision ep_re 158.87641295979628

{"global_step": 130000, "eval_re": [130.15604166318852, 114.39916767730305, 
100.10366640637612, 96.72650448010302, 166.35536208400072, 135.4438392769789, 
441.48516142027205, 142.19417720477483, 119.08919666040686, 142.8110127245592], 
"eval_len": [25, 22, 20, 19, 32, 26, 87, 27, 23, 27]}

 14%|█▍        | 139998/1000000 [2:48:00<12:52:34, 18.55it/s]global step 140000, trans_decision ep_re 166.05290033025008

{"global_step": 140000, "eval_re": [125.25008075098535, 130.1037536813718, 
171.891339053748, 108.62323265407528, 95.1561305753504, 101.75842420687465, 
276.61544088853134, 132.55142955391597, 140.3810656230923, 378.1981063145555], 
"eval_len": [24, 25, 34, 21, 19, 20, 53, 26, 27, 76]}

 15%|█▍        | 149998/1000000 [3:00:30<12:49:46, 18.40it/s]global step 150000, trans_decision ep_re 137.98920883564932

{"global_step": 150000, "eval_re": [112.73070936536205, 232.50017241252934, 
179.05891696949809, 123.18157853861966, 125.23639353387954, 129.37701724754646, 
108.7736780117257, 118.25764753802176, 141.67125192483607, 109.10472281447451], 
"eval_len": [22, 44, 34, 24, 24, 25, 21, 23, 27, 21]}

 16%|█▌        | 159998/1000000 [3:12:50<12:38:51, 18.45it/s]global step 160000, trans_decision ep_re 176.05246871920104

{"global_step": 160000, "eval_re": [141.67251899768158, 149.556412166175, 
138.15377993753296, 124.07064762505345, 136.8461646210583, 141.29592544116122, 
473.4196065871606, 180.01863498617993, 111.97936847593454, 163.51162835407285], 
"eval_len": [27, 29, 27, 24, 28, 27, 88, 34, 22, 31]}

 17%|█▋        | 169998/1000000 [3:25:20<12:36:39, 18.28it/s]global step 170000, trans_decision ep_re 179.56103685213708

{"global_step": 170000, "eval_re": [106.1191004051491, 219.91600451225355, 
164.6457601174804, 139.03588461171242, 171.12899416397696, 108.30673345528294, 
366.9321130565137, 135.65597074394563, 216.48336235467482, 167.38644510038125], 
"eval_len": [21, 43, 31, 28, 33, 21, 69, 26, 41, 33]}

 18%|█▊        | 179998/1000000 [3:37:50<12:21:10, 18.44it/s]global step 180000, trans_decision ep_re 131.6789223445061

{"global_step": 180000, "eval_re": [113.47342142656225, 108.25984262823495, 
105.1195889839996, 167.97265596916964, 147.38317080125717, 140.69829468011616, 
135.5596663828088, 118.87113978519106, 151.90820979043798, 127.54323299728355], 
"eval_len": [23, 21, 21, 32, 28, 27, 26, 23, 29, 25]}

 19%|█▉        | 189998/1000000 [3:50:10<12:11:43, 18.45it/s]global step 190000, trans_decision ep_re 130.50838179764028

{"global_step": 190000, "eval_re": [138.27946519998196, 129.62993968365438, 
107.23966560218538, 126.46459636217135, 108.5339384930687, 144.62882207003008, 
118.98121898072311, 145.58132730840396, 119.76592459888613, 165.97891967729777],
"eval_len": [27, 25, 21, 25, 21, 28, 23, 29, 23, 32]}

 20%|█▉        | 199998/1000000 [4:02:40<12:03:13, 18.44it/s]global step 200000, trans_decision ep_re 128.49482304803058

{"global_step": 200000, "eval_re": [109.27970260338452, 179.74777941344536, 
95.94915437555954, 143.8496703976219, 170.44369474635081, 96.56602089407039, 
90.15043784617795, 111.98365858652969, 157.5639493025667, 129.41416231459877], 
"eval_len": [21, 34, 19, 28, 33, 19, 18, 22, 31, 25]}

 21%|██        | 209998/1000000 [4:15:10<11:54:54, 18.42it/s]global step 210000, trans_decision ep_re 141.8281682052586

{"global_step": 210000, "eval_re": [106.96709265546322, 102.16029587575976, 
107.4672185221027, 176.47983903073919, 118.97731970729077, 84.35574263126661, 
131.62394132826725, 337.3398399957408, 162.41514624325097, 90.49524606270491], 
"eval_len": [21, 20, 21, 33, 23, 17, 25, 65, 31, 18]}

 22%|██▏       | 219998/1000000 [4:27:30<11:49:39, 18.32it/s]global step 220000, trans_decision ep_re 140.65010011931432

{"global_step": 220000, "eval_re": [134.13072139478632, 250.94806001595862, 
122.00214584569636, 108.13977435878189, 150.0332917744118, 125.8426192379159, 
109.01478002685074, 131.95439613369962, 112.72889503671271, 161.70631736832934],
"eval_len": [26, 50, 24, 21, 29, 25, 21, 26, 22, 32]}

 23%|██▎       | 229998/1000000 [4:40:00<11:36:01, 18.44it/s]global step 230000, trans_decision ep_re 121.09069852924317

{"global_step": 230000, "eval_re": [153.37977134823288, 150.47287508489663, 
102.27800054161257, 90.55407172684772, 123.70304928939555, 117.32301595970104, 
103.03610665522898, 153.06981828998, 113.73380275081509, 103.35647364572138], 
"eval_len": [29, 30, 20, 18, 24, 23, 20, 29, 22, 20]}

 24%|██▍       | 239998/1000000 [4:52:30<11:30:08, 18.35it/s]global step 240000, trans_decision ep_re 158.3854926734633

{"global_step": 240000, "eval_re": [137.83835198181856, 142.4919205045794, 
167.74462230068454, 366.4020508946338, 90.14525611483424, 117.25704813215393, 
141.6635984117894, 166.11455657999926, 125.09112162291179, 129.1064001912279], 
"eval_len": [27, 27, 33, 68, 18, 23, 27, 32, 24, 25]}

 25%|██▍       | 249998/1000000 [5:04:50<11:21:12, 18.35it/s]global step 250000, trans_decision ep_re 135.24402771643454

{"global_step": 250000, "eval_re": [166.97738041782313, 111.4770583367529, 
101.4305486769745, 164.74137578710472, 102.18630514200346, 103.09478014144462, 
118.1355848817069, 117.46262595977498, 185.05401608736355, 181.88060173339647], 
"eval_len": [33, 22, 20, 32, 20, 20, 23, 23, 35, 37]}

 26%|██▌       | 259998/1000000 [5:17:20<11:06:48, 18.50it/s]global step 260000, trans_decision ep_re 119.71342017763668

{"global_step": 260000, "eval_re": [96.63176534022307, 89.23058092669979, 
101.70565200081609, 89.73148146453438, 189.2008101411735, 101.64438354898776, 
184.92157700972353, 90.17176853245198, 95.57696210677996, 158.31922070497666], 
"eval_len": [19, 18, 20, 18, 37, 20, 35, 18, 19, 31]}

 27%|██▋       | 269998/1000000 [5:29:50<11:02:16, 18.37it/s]global step 270000, trans_decision ep_re 151.00926857791222

{"global_step": 270000, "eval_re": [166.55993536298902, 102.2140406632981, 
143.42472068591232, 106.91082491622555, 310.6619753358352, 135.6272344884345, 
145.51727495389446, 118.49945286807535, 140.72513598761458, 139.95209051684304],
"eval_len": [32, 20, 28, 21, 61, 26, 28, 23, 27, 27]}

 28%|██▊       | 279998/1000000 [5:42:20<10:51:22, 18.42it/s]global step 280000, trans_decision ep_re 122.2142777027531

{"global_step": 280000, "eval_re": [90.17664111972164, 219.0513405322596, 
124.22510516170574, 84.33903063451827, 108.5957157187779, 146.74704708826374, 
108.5693431249777, 153.5686364066907, 96.48056719829388, 90.3893500423217], 
"eval_len": [18, 42, 24, 17, 21, 28, 21, 30, 19, 18]}

 29%|██▉       | 289998/1000000 [5:54:40<10:42:37, 18.41it/s]global step 290000, trans_decision ep_re 155.10684826534043

{"global_step": 290000, "eval_re": [90.11861037181083, 148.97226646075856, 
149.14850906207363, 119.03437764730994, 144.78964278145244, 117.10854268548147, 
117.42195374825022, 394.26163543507937, 145.7902865161586, 124.42265794502919], 
"eval_len": [18, 29, 30, 23, 28, 23, 23, 76, 28, 24]}

 30%|██▉       | 299998/1000000 [6:07:10<10:36:55, 18.32it/s]global step 300000, trans_decision ep_re 137.61087058346047

{"global_step": 300000, "eval_re": [128.17295276363905, 101.85093256025574, 
116.29346475066049, 199.4861080601982, 129.96827691341863, 103.16572205298156, 
189.80865318512852, 180.13189424552405, 119.58222636997849, 107.64847493281997],
"eval_len": [25, 20, 23, 39, 25, 20, 38, 37, 23, 21]}

 31%|███       | 309998/1000000 [6:19:40<10:18:33, 18.59it/s]global step 310000, trans_decision ep_re 136.03719351447333

{"global_step": 310000, "eval_re": [148.30366863611795, 95.89300694530053, 
129.37222591444254, 113.77467889935203, 102.58156933268856, 155.8115346750751, 
178.767663359726, 161.91767939758847, 95.67577951033572, 178.2741284741061], 
"eval_len": [29, 19, 25, 22, 20, 30, 35, 31, 19, 34]}

 32%|███▏      | 319998/1000000 [6:32:10<10:15:23, 18.42it/s]global step 320000, trans_decision ep_re 153.23027242074136

{"global_step": 320000, "eval_re": [135.75989544740662, 182.00031117275435, 
147.81663050886678, 172.6078887610974, 119.50318378865745, 273.65300395606863, 
134.47880295117298, 145.3167376457182, 96.62915525927572, 124.53711471639545], 
"eval_len": [26, 36, 28, 33, 23, 59, 26, 28, 19, 24]}

 33%|███▎      | 329998/1000000 [6:44:30<10:02:56, 18.52it/s]global step 330000, trans_decision ep_re 137.85643139858675

{"global_step": 330000, "eval_re": [159.4104986243645, 304.7427907723149, 
119.75734386233538, 95.914691123734, 101.17126991200809, 138.8606763959807, 
118.20355000390951, 125.67613560590921, 112.26714330622184, 102.56021437908926],
"eval_len": [31, 58, 23, 19, 20, 27, 23, 24, 22, 20]}

 34%|███▍      | 339998/1000000 [6:56:50<10:00:18, 18.32it/s]global step 340000, trans_decision ep_re 134.91973594427083

{"global_step": 340000, "eval_re": [118.62673383890045, 117.48357236646179, 
214.88047625025413, 102.2224727966657, 133.90829381626418, 162.5974825563507, 
130.02531536199533, 90.12129574063604, 155.81852224952624, 123.51319446565363], 
"eval_len": [23, 23, 42, 20, 26, 31, 27, 18, 30, 24]}

 35%|███▍      | 349998/1000000 [7:09:20<9:45:37, 18.50it/s]global step 350000, trans_decision ep_re 121.26916046635213

{"global_step": 350000, "eval_re": [146.85792846964097, 103.1124758383429, 
143.5113858121343, 94.33886950836481, 120.77425828147418, 101.51145660276708, 
102.72828424232775, 125.53476005008363, 107.26343557743249, 167.05875028095298],
"eval_len": [28, 20, 27, 19, 23, 20, 20, 24, 21, 33]}

 36%|███▌      | 359998/1000000 [7:21:40<9:26:16, 18.84it/s]global step 360000, trans_decision ep_re 171.6987671085088

{"global_step": 360000, "eval_re": [120.29530934947361, 132.664397642143, 
205.08035626624854, 329.69619484660234, 129.8339415893894, 89.81217584949886, 
159.23921218638324, 314.0762897390459, 101.57974000045816, 134.71005361584508], 
"eval_len": [23, 26, 39, 64, 25, 18, 31, 60, 20, 26]}

 37%|███▋      | 369998/1000000 [7:34:00<9:25:21, 18.57it/s]global step 370000, trans_decision ep_re 206.5613931464042

{"global_step": 370000, "eval_re": [133.40908880529764, 596.7303857242604, 
429.79186498639723, 106.93118967690611, 126.35313929185008, 95.30515458864443, 
117.14752273668184, 131.16487801268596, 147.11443266061585, 181.66627498070247],
"eval_len": [26, 126, 82, 21, 25, 19, 23, 26, 29, 36]}

 38%|███▊      | 379998/1000000 [7:46:20<9:16:08, 18.58it/s]global step 380000, trans_decision ep_re 140.27604600507726

{"global_step": 380000, "eval_re": [120.97503546967235, 155.96114140912968, 
159.12300443325978, 123.33751947199946, 128.4219163369291, 170.24554347187924, 
144.00939592380212, 139.24071880342268, 107.62504772699667, 153.8211370036817], 
"eval_len": [24, 30, 31, 24, 25, 33, 28, 27, 21, 29]}

 39%|███▉      | 389998/1000000 [7:58:50<9:07:22, 18.57it/s]global step 390000, trans_decision ep_re 155.92547952376663

{"global_step": 390000, "eval_re": [156.10024488967963, 110.0851463130722, 
140.65442426259608, 157.50043960003512, 96.51464688774006, 185.72760067704698, 
330.9980890273635, 114.26555492920133, 124.02906719371798, 143.3795814572133], 
"eval_len": [30, 21, 27, 31, 19, 36, 64, 22, 24, 28]}

 40%|███▉      | 399998/1000000 [8:11:10<9:03:08, 18.41it/s]global step 400000, trans_decision ep_re 132.0048056680398

{"global_step": 400000, "eval_re": [111.52665841668221, 137.55654309675376, 
172.31113441606823, 101.43542055049475, 129.20852443995014, 114.67277541368752, 
102.04148053023492, 162.5207352950119, 164.05400801776747, 124.72077650374726], 
"eval_len": [22, 27, 33, 20, 25, 22, 20, 32, 31, 24]}

 41%|████      | 409998/1000000 [8:23:40<8:46:27, 18.68it/s]global step 410000, trans_decision ep_re 144.42974702836875

{"global_step": 410000, "eval_re": [182.851734012541, 114.6009907456, 
133.70264265017894, 194.14353425383771, 96.62682469625352, 97.1354792075245, 
175.19577342900277, 246.96722597528452, 112.86182918897117, 90.21143612449322], 
"eval_len": [37, 22, 26, 38, 19, 19, 35, 47, 22, 18]}

 42%|████▏     | 419998/1000000 [8:36:00<8:45:57, 18.38it/s]global step 420000, trans_decision ep_re 188.03572135101453

{"global_step": 420000, "eval_re": [118.85438694226934, 163.56889885571306, 
146.72826137605773, 135.89328302356202, 472.013665683345, 90.36108543710743, 
420.67337134662216, 113.52883815624398, 102.03974127947734, 116.69568140974745],
"eval_len": [23, 31, 29, 26, 88, 18, 78, 23, 20, 23]}

 43%|████▎     | 429998/1000000 [8:48:20<8:32:19, 18.54it/s]global step 430000, trans_decision ep_re 135.4549894999679

{"global_step": 430000, "eval_re": [108.06303173247052, 96.76500516804958, 
194.02067309222494, 106.95817911857284, 172.42227687510768, 134.22554185069262, 
113.77401964867164, 180.87115716775563, 131.5377557524297, 115.91225459370396], 
"eval_len": [21, 19, 37, 21, 33, 26, 22, 35, 26, 23]}

 44%|████▍     | 439998/1000000 [9:00:40<8:26:12, 18.44it/s]global step 440000, trans_decision ep_re 113.19230303467046

{"global_step": 440000, "eval_re": [96.59878795294343, 108.05680423517546, 
173.36803537596455, 113.11391456034522, 96.64572106226973, 108.37954392824733, 
124.01757428276242, 108.70512002941534, 107.29987350929012, 95.73765541029088], 
"eval_len": [19, 21, 35, 22, 19, 21, 24, 21, 21, 19]}

 45%|████▍     | 449998/1000000 [9:13:10<8:11:28, 18.65it/s]global step 450000, trans_decision ep_re 140.47902667146678

{"global_step": 450000, "eval_re": [198.41298035465624, 103.11821817492846, 
173.59390549917887, 151.3702320003013, 95.783528026535, 108.62628761503117, 
139.19317191654372, 125.6393868956055, 167.4709218494715, 141.58163438241604], 
"eval_len": [38, 20, 33, 29, 19, 21, 28, 25, 32, 27]}

 46%|████▌     | 459998/1000000 [9:25:30<8:06:38, 18.49it/s]global step 460000, trans_decision ep_re 154.5232525514441

{"global_step": 460000, "eval_re": [191.42245052047355, 164.5942609970914, 
145.07025089361412, 114.58971263884672, 101.4992658230326, 102.79276626068173, 
196.5333761798774, 89.66508882119373, 343.5785122364063, 95.48684114322346], 
"eval_len": [37, 32, 28, 22, 20, 20, 41, 18, 65, 19]}

 47%|████▋     | 469998/1000000 [9:38:00<7:55:15, 18.59it/s]global step 470000, trans_decision ep_re 137.33373833351038

{"global_step": 470000, "eval_re": [122.91049682360564, 90.24776522243545, 
137.4706633504444, 96.58669586705133, 120.01978204983257, 100.7752116600721, 
146.54677101520133, 280.2791982175354, 156.52650865841474, 121.97429047051075], 
"eval_len": [24, 18, 27, 19, 23, 20, 28, 52, 30, 24]}

 48%|████▊     | 479998/1000000 [9:50:20<7:45:01, 18.64it/s]global step 480000, trans_decision ep_re 158.83084721234235

{"global_step": 480000, "eval_re": [118.3524668117865, 318.00592812057323, 
332.5564459094989, 95.8872419595047, 108.92915364169072, 134.21987379988536, 
95.83192400385916, 97.285508350827, 145.80777189573473, 141.4321576300631], 
"eval_len": [23, 59, 64, 19, 21, 26, 19, 19, 29, 27]}

 49%|████▉     | 489998/1000000 [10:02:40<7:40:39, 18.45it/s]global step 490000, trans_decision ep_re 155.09645029470246

{"global_step": 490000, "eval_re": [351.3809534919839, 95.24285603865275, 
139.75009344815436, 147.19637186640244, 130.4149226219896, 173.25567491865692, 
171.68255417782632, 114.20231576824989, 137.73940608108882, 90.09935453401951], 
"eval_len": [67, 19, 27, 29, 25, 34, 34, 22, 26, 18]}

 50%|████▉     | 499998/1000000 [10:15:00<7:32:14, 18.43it/s]global step 500000, trans_decision ep_re 181.6233817323056

{"global_step": 500000, "eval_re": [349.0702344782124, 191.25399943204812, 
109.54848253185152, 138.4476756705577, 213.3808370013304, 107.39743291482412, 
329.26062427456037, 108.03142899674596, 160.87877922334854, 108.96432279957692],
"eval_len": [69, 37, 21, 27, 42, 21, 65, 21, 31, 21]}

 51%|█████     | 509998/1000000 [10:27:30<7:14:18, 18.80it/s]global step 510000, trans_decision ep_re 174.31165444970912

{"global_step": 510000, "eval_re": [137.99553759687484, 134.99355116914748, 
90.25020075157137, 107.268865893042, 119.31736359685253, 179.94894983009712, 
140.2739831919385, 124.47966550119835, 567.2370540974687, 141.35137286890057], 
"eval_len": [27, 26, 18, 21, 23, 34, 27, 24, 113, 27]}

 52%|█████▏    | 519998/1000000 [10:39:50<7:08:56, 18.65it/s]global step 520000, trans_decision ep_re 192.3813796341492

{"global_step": 520000, "eval_re": [191.15694455167647, 96.26832284599647, 
495.24912338036023, 166.11705805334992, 96.27814289767602, 167.6440987056617, 
117.61617005898886, 244.38475147119948, 90.61681703513857, 258.4823673414442], 
"eval_len": [39, 19, 92, 32, 19, 32, 23, 48, 18, 51]}

 53%|█████▎    | 529998/1000000 [10:52:00<6:58:51, 18.70it/s]global step 530000, trans_decision ep_re 170.79872318582588

{"global_step": 530000, "eval_re": [129.37329814710432, 168.43017710829804, 
151.79678226337683, 330.51579012510575, 301.98670730731686, 202.3326143091242, 
107.62414824847956, 96.10033078672572, 90.03894569755036, 129.78843786517734], 
"eval_len": [25, 32, 29, 61, 59, 39, 21, 19, 18, 26]}

 54%|█████▍    | 539998/1000000 [11:04:20<6:50:17, 18.69it/s]global step 540000, trans_decision ep_re 159.448057604965

{"global_step": 540000, "eval_re": [120.05842696770442, 123.33991176971418, 
151.7187014938148, 101.46889173699876, 90.50222112140453, 124.96207545668456, 
108.64529276289517, 194.18164576467186, 453.9258552638579, 125.67755371190351], 
"eval_len": [24, 24, 29, 20, 18, 24, 21, 37, 92, 24]}

 55%|█████▍    | 549998/1000000 [11:16:40<6:37:53, 18.85it/s]global step 550000, trans_decision ep_re 126.90058736709754

{"global_step": 550000, "eval_re": [89.28048762723338, 95.8732316204406, 
105.99233704570028, 89.45855236834076, 128.39236917602253, 84.26727135970854, 
91.78269617404209, 101.78707408547982, 162.42575328407284, 319.74610092993464], 
"eval_len": [18, 19, 21, 18, 25, 17, 18, 20, 32, 61]}

 56%|█████▌    | 559998/1000000 [11:29:00<6:31:14, 18.74it/s]global step 560000, trans_decision ep_re 126.15102024581775

{"global_step": 560000, "eval_re": [129.80224705332594, 89.51853528188364, 
117.04397592038009, 102.61520766185966, 168.30709972017866, 101.85325091879503, 
204.81880541705502, 89.86066784925346, 112.4072895011612, 145.2831231342847], 
"eval_len": [25, 18, 23, 20, 33, 20, 39, 18, 22, 28]}

 57%|█████▋    | 569998/1000000 [11:41:10<6:23:49, 18.67it/s]global step 570000, trans_decision ep_re 190.79881739426145

{"global_step": 570000, "eval_re": [113.83337025002655, 217.42720584154767, 
451.98847889048454, 114.88999706593924, 164.24498425704346, 89.95051158547426, 
338.21182378968206, 108.99638355270368, 172.64321681599023, 135.8022018937229], 
"eval_len": [22, 42, 94, 22, 32, 18, 65, 21, 33, 26]}

 58%|█████▊    | 579998/1000000 [11:53:40<6:16:44, 18.58it/s]global step 580000, trans_decision ep_re 180.67623510991433

{"global_step": 580000, "eval_re": [169.04067089075488, 176.60214654565033, 
177.06616812096516, 107.37716862781853, 167.45259870964597, 107.22754394481154, 
135.45905153801996, 506.4841441715249, 151.884231677801, 108.16862687215092], 
"eval_len": [33, 34, 36, 21, 32, 21, 26, 101, 29, 21]}

 59%|█████▉    | 589998/1000000 [12:06:00<6:06:16, 18.66it/s]global step 590000, trans_decision ep_re 158.21858185665434

{"global_step": 590000, "eval_re": [170.82712206797873, 90.77762428717517, 
502.28827721415513, 115.94658140249776, 114.20468052388885, 90.21587791506414, 
109.25541267822965, 131.07937186553247, 96.1732523205302, 161.41761829149138], 
"eval_len": [33, 18, 102, 23, 22, 18, 21, 25, 19, 31]}

 60%|█████▉    | 599998/1000000 [12:18:20<5:57:02, 18.67it/s]global step 600000, trans_decision ep_re 133.45186137554128

{"global_step": 600000, "eval_re": [112.83057041247079, 167.17891032791647, 
141.46040456117052, 103.06227161421234, 96.86738357774996, 145.05106148231292, 
128.98473506733217, 148.55489124430974, 119.49386770769287, 171.03451776024474],
"eval_len": [22, 32, 27, 20, 19, 28, 25, 29, 23, 33]}

 61%|██████    | 609998/1000000 [12:30:40<5:44:37, 18.86it/s]global step 610000, trans_decision ep_re 141.7066043440107

{"global_step": 610000, "eval_re": [108.46968092967673, 166.84062166104079, 
102.25622102823901, 146.68302326943245, 89.97839872428808, 113.41407620989311, 
113.43332025297308, 108.19819401573332, 180.0284661157054, 287.76404123312506], 
"eval_len": [21, 32, 20, 28, 18, 22, 22, 21, 35, 56]}

 62%|██████▏   | 619998/1000000 [12:43:00<5:42:31, 18.49it/s]global step 620000, trans_decision ep_re 141.23587980197846

{"global_step": 620000, "eval_re": [153.30189077055346, 96.35562382881406, 
325.08291560215207, 177.2976958789728, 108.1507807598543, 124.9239761427188, 
101.68031670038167, 90.30495332243025, 132.93658788440223, 102.32405712950491], 
"eval_len": [29, 19, 61, 34, 21, 24, 20, 18, 27, 20]}

 63%|██████▎   | 629998/1000000 [12:55:20<5:30:31, 18.66it/s]global step 630000, trans_decision ep_re 128.80163656675268

{"global_step": 630000, "eval_re": [121.1052186462518, 113.6255986907139, 
116.67761433240278, 141.8590209070106, 200.6687617867388, 89.24799110199899, 
133.35192247264413, 150.30351747753065, 114.83038574039001, 106.34633451184494],
"eval_len": [24, 22, 23, 27, 38, 18, 26, 30, 22, 21]}

 64%|██████▍   | 639998/1000000 [13:07:40<5:20:05, 18.75it/s]global step 640000, trans_decision ep_re 150.8199790637507

{"global_step": 640000, "eval_re": [124.99533429470974, 118.84277153657624, 
108.92982890879999, 142.24755471032373, 189.47680734233884, 179.06832470359333, 
135.44091605857435, 96.11284778975461, 100.81019400499748, 312.2752112878386], 
"eval_len": [24, 23, 21, 28, 36, 35, 26, 19, 20, 57]}

 65%|██████▍   | 649998/1000000 [13:20:00<5:12:55, 18.64it/s]global step 650000, trans_decision ep_re 187.3776698972269

{"global_step": 650000, "eval_re": [258.306929221573, 89.93269870736216, 
108.19178072118979, 303.90629653399964, 101.74463712023919, 386.91180805287047, 
185.15587024295007, 114.83011878729306, 168.50469458595185, 156.29186499883966],
"eval_len": [53, 18, 21, 55, 20, 87, 35, 22, 33, 31]}

 66%|██████▌   | 659998/1000000 [13:32:20<5:04:05, 18.63it/s]global step 660000, trans_decision ep_re 166.46618796157952

{"global_step": 660000, "eval_re": [219.4064112824346, 152.70088667960826, 
124.64549124822436, 154.65479570646116, 101.91043021149777, 107.90082350440248, 
107.63152301337537, 187.82924727724256, 345.93341622257446, 162.04885446997397],
"eval_len": [45, 29, 24, 30, 20, 21, 21, 36, 64, 33]}

 67%|██████▋   | 669998/1000000 [13:44:40<4:55:59, 18.58it/s]global step 670000, trans_decision ep_re 140.90818465910982

{"global_step": 670000, "eval_re": [96.2735289391475, 125.40523917190374, 
157.4290785894519, 108.97571820905362, 161.44458307324214, 136.34347737105327, 
159.28031983690647, 181.3910217495111, 124.08740508317744, 158.45147456765116], 
"eval_len": [19, 24, 31, 21, 31, 26, 30, 36, 24, 31]}

 68%|██████▊   | 679998/1000000 [13:57:00<4:45:07, 18.71it/s]global step 680000, trans_decision ep_re 116.96748011014117

{"global_step": 680000, "eval_re": [165.14407750681718, 128.48660772299317, 
84.35258587099685, 178.4331491519678, 89.49805341129549, 106.54212394533117, 
135.30015783616233, 107.99699995910485, 84.27328996517362, 89.64775573156939], 
"eval_len": [31, 25, 17, 35, 18, 21, 27, 21, 17, 18]}

 69%|██████▉   | 689998/1000000 [14:09:21<4:38:01, 18.58it/s]global step 690000, trans_decision ep_re 150.47626264800368

{"global_step": 690000, "eval_re": [157.16067664496543, 128.65669276950933, 
96.62449717775073, 100.0789133603344, 102.03917549471987, 108.57399420095321, 
409.4546535380195, 102.55952230157168, 108.84465334117333, 190.76984765103927], 
"eval_len": [30, 25, 19, 20, 20, 21, 81, 20, 21, 37]}

 70%|██████▉   | 699998/1000000 [14:21:51<4:30:57, 18.45it/s]global step 700000, trans_decision ep_re 129.18081141175114

{"global_step": 700000, "eval_re": [149.40869462733644, 131.8802627194662, 
181.66283236584184, 139.03654600319138, 96.19052930903368, 107.9307355019342, 
96.6804221714214, 96.59737809921748, 156.58539890756242, 135.8353144125064], 
"eval_len": [29, 25, 35, 27, 19, 21, 19, 19, 30, 26]}

 71%|███████   | 709998/1000000 [14:34:11<4:19:13, 18.65it/s]global step 710000, trans_decision ep_re 166.9017583923766

{"global_step": 710000, "eval_re": [95.83556282115843, 111.95996762959712, 
255.88671421978196, 137.48181983042548, 166.6577801987428, 144.1741694814766, 
177.01894606860935, 111.85402275349361, 148.85525368074184, 319.2933472397385], 
"eval_len": [19, 22, 48, 28, 32, 28, 36, 22, 29, 64]}

 72%|███████▏  | 719998/1000000 [14:46:31<4:09:50, 18.68it/s]global step 720000, trans_decision ep_re 126.58417113644273

{"global_step": 720000, "eval_re": [128.14076036254448, 151.7244493706015, 
101.17690624829675, 123.9061798559625, 179.6426232127918, 126.66760167019359, 
118.50822197344058, 107.65849505018309, 138.9461195333285, 89.47035408708435], 
"eval_len": [25, 29, 20, 24, 35, 25, 23, 21, 27, 18]}

 73%|███████▎  | 729998/1000000 [14:58:41<3:59:21, 18.80it/s]global step 730000, trans_decision ep_re 129.60453639095877

{"global_step": 730000, "eval_re": [102.29350794673815, 155.0245304406407, 
156.09519673694268, 107.09050549134771, 112.91941525864463, 113.33639626199815, 
170.17424059028278, 136.85374962373726, 96.38491271164115, 145.87290884761464], 
"eval_len": [20, 30, 30, 21, 22, 22, 33, 26, 19, 28]}

 74%|███████▍  | 739998/1000000 [15:11:01<3:50:41, 18.78it/s]global step 740000, trans_decision ep_re 140.36848940904574

{"global_step": 740000, "eval_re": [108.26842843769303, 136.04437670638083, 
113.80217023762836, 134.50034643427549, 101.52709125866346, 124.76285023084317, 
107.97386921445339, 254.57322032436093, 152.09830373700078, 170.13423750915794],
"eval_len": [21, 26, 22, 26, 20, 24, 21, 51, 29, 33]}

 75%|███████▍  | 749998/1000000 [15:23:21<3:42:05, 18.76it/s]global step 750000, trans_decision ep_re 159.49714707037748

{"global_step": 750000, "eval_re": [158.68315020408838, 103.16832096837892, 
90.9347281864223, 373.52696689390234, 120.56535413142394, 321.90805088075547, 
124.76230184429556, 102.64743579541529, 109.06509603432337, 89.71006576476908], 
"eval_len": [31, 20, 18, 79, 24, 64, 24, 20, 21, 18]}

 76%|███████▌  | 759998/1000000 [15:35:31<3:34:39, 18.63it/s]global step 760000, trans_decision ep_re 133.7989144807279

{"global_step": 760000, "eval_re": [144.1207027689669, 126.08987211728099, 
102.52535332448232, 139.97503973325277, 96.01233154174844, 161.2633107242387, 
151.9181773560237, 133.4930668250419, 140.35577805716161, 142.23551235908178], 
"eval_len": [28, 24, 20, 28, 19, 31, 29, 28, 27, 27]}

 77%|███████▋  | 769998/1000000 [15:47:51<3:25:17, 18.67it/s]global step 770000, trans_decision ep_re 145.8740924065589

{"global_step": 770000, "eval_re": [164.02573643656658, 211.01281498816095, 
229.9045733413999, 103.2185549516031, 95.9670501104531, 140.0812845799694, 
118.19318299167408, 114.04683060333959, 144.67850760377783, 137.61238845864457],
"eval_len": [32, 41, 44, 20, 19, 27, 23, 22, 28, 28]}

 78%|███████▊  | 779998/1000000 [16:00:11<3:15:33, 18.75it/s]global step 780000, trans_decision ep_re 139.4811257804582

{"global_step": 780000, "eval_re": [158.2930746224051, 147.72093363611168, 
117.05707010389902, 150.29067894486624, 133.22610200964704, 117.89766378706526, 
103.01741536629616, 178.81867394668316, 131.9703662741252, 156.51927911348344], 
"eval_len": [30, 28, 23, 29, 26, 23, 20, 34, 26, 31]}

 79%|███████▉  | 789998/1000000 [16:12:21<3:05:13, 18.90it/s]global step 790000, trans_decision ep_re 133.61927174808827

{"global_step": 790000, "eval_re": [124.80465548944221, 124.1046642364404, 
178.75563642483945, 120.02693860695523, 119.19704857836187, 102.40894633656615, 
96.22310350401686, 138.42419977611178, 158.22395861228745, 174.0235659158614], 
"eval_len": [24, 24, 35, 23, 23, 20, 19, 27, 30, 34]}

 80%|███████▉  | 799998/1000000 [16:24:41<2:57:43, 18.76it/s]global step 800000, trans_decision ep_re 155.40337794067767

{"global_step": 800000, "eval_re": [140.16390805362525, 119.3883569417244, 
96.15878419179275, 166.9060654508546, 165.67783153060256, 118.45422633804002, 
162.5203020071115, 101.6198825014207, 363.80647247229626, 119.33794991930874], 
"eval_len": [27, 23, 19, 32, 33, 23, 32, 20, 69, 23]}

 81%|████████  | 809998/1000000 [16:36:51<2:47:09, 18.94it/s]global step 810000, trans_decision ep_re 128.82999213009694

{"global_step": 810000, "eval_re": [128.57045831841108, 141.34962509128394, 
96.55661743586367, 101.38986561367899, 172.1136123795392, 118.86623767286478, 
141.71718694352765, 153.2637318283244, 95.34099717105376, 139.131588846422], 
"eval_len": [25, 28, 19, 20, 33, 23, 27, 30, 19, 27]}

 82%|████████▏ | 819998/1000000 [16:49:11<2:40:43, 18.67it/s]global step 820000, trans_decision ep_re 159.49005356150087

{"global_step": 820000, "eval_re": [108.93399304556517, 113.6993557668949, 
209.69877040485937, 127.56832479526244, 142.73662659204783, 141.34782437469073, 
118.76192723783599, 142.0019221125109, 388.1067039247596, 102.04508736058187], 
"eval_len": [21, 22, 41, 25, 27, 28, 23, 28, 75, 20]}

 83%|████████▎ | 829998/1000000 [17:01:31<2:29:43, 18.92it/s]global step 830000, trans_decision ep_re 143.6504460499913

{"global_step": 830000, "eval_re": [96.76415293335143, 134.309669247952, 
160.90803997044932, 102.12992225763553, 144.5171561332832, 234.20068549667474, 
177.84296993349545, 89.31161280014018, 113.61455992275441, 182.9056918041767], 
"eval_len": [19, 26, 31, 20, 28, 44, 34, 18, 22, 35]}

 84%|████████▍ | 839998/1000000 [17:13:41<2:22:08, 18.76it/s]global step 840000, trans_decision ep_re 149.11843653994495

{"global_step": 840000, "eval_re": [161.38954037669413, 168.68062323096115, 
122.2512298392615, 108.58976357135097, 151.7995406317101, 95.96031133719059, 
96.08313122376838, 114.0016914796782, 101.98382597212375, 370.44470773671054], 
"eval_len": [31, 32, 24, 21, 30, 19, 19, 22, 20, 77]}

 85%|████████▍ | 849998/1000000 [17:26:01<2:12:38, 18.85it/s]global step 850000, trans_decision ep_re 132.9241600908352

{"global_step": 850000, "eval_re": [129.4183870595224, 118.0655246937981, 
164.93085282694207, 119.29359005517165, 186.81125024740444, 96.60287394943123, 
135.20517460365892, 110.77293164886848, 177.15106533911202, 90.98995048444247], 
"eval_len": [25, 23, 32, 23, 36, 19, 26, 22, 34, 18]}

 86%|████████▌ | 859998/1000000 [17:38:21<2:04:58, 18.67it/s]global step 860000, trans_decision ep_re 140.6842389422604

{"global_step": 860000, "eval_re": [116.05657362487716, 89.59172987245056, 
120.22739143074494, 84.36132481984795, 139.23468811919858, 130.52514812205206, 
311.5537534913032, 115.11412951390511, 163.18691467409954, 136.99073575412493], 
"eval_len": [23, 18, 23, 17, 27, 25, 61, 22, 31, 26]}

 87%|████████▋ | 869998/1000000 [17:50:41<1:56:06, 18.66it/s]global step 870000, trans_decision ep_re 144.55420231460164

{"global_step": 870000, "eval_re": [96.15624953000207, 90.90805307015134, 
392.44065005428115, 156.37824461657172, 102.83501265728256, 96.5307205956158, 
132.51721299673116, 113.76374618558137, 144.08833137856692, 119.92380206123252],
"eval_len": [19, 18, 84, 30, 20, 19, 26, 22, 28, 23]}

 88%|████████▊ | 879998/1000000 [18:02:51<1:46:24, 18.80it/s]global step 880000, trans_decision ep_re 142.74741891974355

{"global_step": 880000, "eval_re": [173.00312655184615, 176.12600633835115, 
136.58028778808884, 100.934905603838, 169.48051108722603, 139.79684369763484, 
153.52561104234252, 137.91362268391075, 126.10998621549314, 114.0032881887042], 
"eval_len": [34, 34, 27, 20, 33, 27, 30, 26, 24, 22]}

 89%|████████▉ | 889998/1000000 [18:15:11<1:37:14, 18.85it/s]global step 890000, trans_decision ep_re 126.92836920500015

{"global_step": 890000, "eval_re": [118.62941722386986, 107.53807144787828, 
130.93387502094572, 90.07363186048734, 120.01713496413748, 103.87050706002073, 
196.18364530979915, 112.36058673052982, 182.26154488538535, 107.41527754694775],
"eval_len": [23, 21, 25, 18, 23, 20, 38, 22, 36, 21]}

 90%|████████▉ | 899998/1000000 [18:27:31<1:29:26, 18.64it/s]global step 900000, trans_decision ep_re 130.0626823212739

{"global_step": 900000, "eval_re": [113.3125987748507, 91.14778044625268, 
124.044864989574, 136.5706647748261, 100.03051517893911, 274.04342077730973, 
96.06036907735725, 118.54458607350307, 151.57649589254314, 95.29552722758325], 
"eval_len": [22, 18, 24, 26, 20, 55, 19, 23, 29, 19]}

 91%|█████████ | 909998/1000000 [18:39:51<1:19:51, 18.78it/s]global step 910000, trans_decision ep_re 128.7005707942291

{"global_step": 910000, "eval_re": [95.71384167596898, 89.3343521521629, 
139.78458798328097, 101.31541301489888, 95.1136937080406, 101.99385748851283, 
115.81696409175412, 153.0191239545789, 281.48604914991216, 113.42782472318046], 
"eval_len": [19, 18, 27, 20, 19, 20, 23, 29, 56, 22]}

 92%|█████████▏| 919998/1000000 [18:52:01<1:11:14, 18.72it/s]global step 920000, trans_decision ep_re 158.07908752183516

{"global_step": 920000, "eval_re": [259.47477912053364, 372.892420302016, 
107.19096060668834, 89.44513531900904, 112.89582632678015, 123.1734848982573, 
177.59420205326524, 107.45550167074396, 129.66306808981506, 101.00549683124288],
"eval_len": [51, 74, 21, 18, 22, 24, 34, 21, 25, 20]}

 93%|█████████▎| 929998/1000000 [19:04:21<1:02:21, 18.71it/s]global step 930000, trans_decision ep_re 142.1729573572328

{"global_step": 930000, "eval_re": [126.07146615076103, 123.19941781434432, 
117.68376612495376, 96.93045077246782, 140.2130519675025, 113.63479389681332, 
171.9441905738063, 90.7325212453623, 254.36888942654397, 186.9510255997727], 
"eval_len": [24, 24, 23, 19, 27, 22, 33, 18, 50, 36]}

 94%|█████████▍| 939998/1000000 [19:16:41<53:42, 18.62it/s]global step 940000, trans_decision ep_re 153.34982720697934

{"global_step": 940000, "eval_re": [96.35971617803305, 119.75874868534702, 
90.22973129642291, 108.74262235392005, 354.897994536487, 113.28678428969788, 
119.34302209405617, 281.1362192412622, 107.72173512333504, 142.02169827123208], 
"eval_len": [19, 23, 18, 21, 67, 22, 23, 55, 21, 28]}

 95%|█████████▍| 949998/1000000 [19:29:01<44:33, 18.70it/s]global step 950000, trans_decision ep_re 155.42116071536614

{"global_step": 950000, "eval_re": [201.00836495878485, 117.85048210548686, 
108.16057932280107, 123.78072897504494, 179.88735632491438, 107.94845436818638, 
124.33927894309863, 342.22854324611075, 96.64135554107301, 152.3664633681605], 
"eval_len": [39, 23, 21, 24, 36, 21, 24, 69, 19, 29]}

 96%|█████████▌| 959998/1000000 [19:41:21<35:32, 18.76it/s]global step 960000, trans_decision ep_re 166.4437372996835

{"global_step": 960000, "eval_re": [127.23336393865337, 632.7595181213546, 
147.75838505817788, 113.23416523285798, 95.26364582949779, 113.95949963495877, 
112.80866937045045, 108.98466727353876, 122.76236314272893, 89.6730953946164], 
"eval_len": [25, 123, 29, 22, 19, 22, 22, 21, 24, 18]}

 97%|█████████▋| 969998/1000000 [19:53:41<26:54, 18.59it/s]global step 970000, trans_decision ep_re 171.16818610139177

{"global_step": 970000, "eval_re": [101.3251277409739, 107.14880514707815, 
162.310037775308, 114.56444309939997, 106.15557172721607, 188.95063620141113, 
255.72900175337094, 171.24304650415445, 346.32955418606, 157.92563687894497], 
"eval_len": [20, 21, 31, 22, 21, 37, 49, 34, 65, 31]}

 98%|█████████▊| 979998/1000000 [20:05:51<17:50, 18.68it/s]global step 980000, trans_decision ep_re 143.1198209476808

{"global_step": 980000, "eval_re": [161.58872336133751, 157.36398702221038, 
145.00442005885935, 100.52467414120173, 174.32267674544727, 119.07027753505616, 
96.51623927969986, 123.4994178891791, 198.26251940811983, 155.04527403569662], 
"eval_len": [31, 31, 28, 20, 33, 23, 19, 24, 38, 30]}

 99%|█████████▉| 989998/1000000 [20:18:11<08:55, 18.66it/s]global step 990000, trans_decision ep_re 156.18858682892144

{"global_step": 990000, "eval_re": [133.32949318625293, 119.14665009980916, 
96.55648251025417, 143.2420836765442, 130.4634435841345, 97.01647613261169, 
108.9232027451115, 488.0809547895738, 119.82699574734376, 125.30008581757878], 
"eval_len": [26, 23, 19, 29, 25, 19, 21, 94, 23, 24]}

100%|█████████▉| 999998/1000000 [20:30:31<00:00, 18.54it/s]global step 1000000, trans_decision ep_re 149.51314440447115

{"global_step": 1000000, "eval_re": [123.03507312637846, 148.33433837805327, 
106.87457324317855, 128.4606146512663, 109.79718231740179, 101.6178295563085, 
96.47313929172347, 365.09177847063853, 159.25097840703268, 156.1959366027301], 
"eval_len": [24, 29, 21, 25, 22, 20, 19, 67, 31, 30]}

100%|██████████| 1000000/1000000 [20:30:39<00:00, 13.54it/s]
