
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [06:40<15:25:50, 17.82it/s]global step 10000, trans_decision ep_re 123.68609910653183

{"global_step": 10000, "eval_re": [123.85336369950804, 169.2600726119876, 
88.87873719298295, 120.06598658357767, 141.78467198487013, 118.94340464507995, 
96.77275075011315, 174.65504144219275, 101.67274734369794, 100.97421481130816], 
"eval_len": [24, 33, 18, 23, 28, 23, 19, 35, 20, 20]}

  2%|▏         | 19998/1000000 [19:20<15:00:49, 18.13it/s]global step 20000, trans_decision ep_re 149.15580884424926

{"global_step": 20000, "eval_re": [130.27027739255774, 121.96852645396535, 
140.76872176992384, 202.9380245769684, 95.63364664882212, 155.17265970773235, 
113.15750961809565, 116.2459097240855, 308.4512406133262, 106.9515719370153], 
"eval_len": [25, 24, 27, 40, 19, 30, 22, 23, 58, 21]}

  3%|▎         | 29998/1000000 [31:50<15:03:54, 17.89it/s]global step 30000, trans_decision ep_re 137.57774435183393

{"global_step": 30000, "eval_re": [132.58872453410086, 132.9649121594091, 
95.82517306940633, 100.42641401534226, 155.51144440724366, 114.36110225564676, 
191.07619099029841, 159.21347073174297, 131.12857657353692, 162.68143478161204],
"eval_len": [26, 26, 19, 20, 30, 23, 38, 31, 25, 32]}

  4%|▍         | 39998/1000000 [44:40<14:56:49, 17.84it/s]global step 40000, trans_decision ep_re 124.07904239477155

{"global_step": 40000, "eval_re": [113.30784699581575, 101.86117385770547, 
90.45437962283397, 180.4486919287204, 104.96448875234896, 118.26025302723453, 
157.4497846915648, 94.6571177854465, 100.1061918698714, 179.28049541617364], 
"eval_len": [22, 20, 18, 35, 21, 23, 31, 19, 20, 34]}

  5%|▍         | 49998/1000000 [57:20<14:35:16, 18.09it/s]global step 50000, trans_decision ep_re 178.90449672934975

{"global_step": 50000, "eval_re": [94.72252480428205, 122.62221712202488, 
106.04252147901614, 257.04543697705645, 106.75391688810431, 239.96893773581598, 
168.26484037344295, 161.23103909195555, 375.51756815700486, 156.87596466479403],
"eval_len": [19, 24, 21, 51, 21, 47, 33, 31, 69, 31]}

  6%|▌         | 59998/1000000 [1:10:00<14:37:06, 17.86it/s]global step 60000, trans_decision ep_re 118.57738519195198

{"global_step": 60000, "eval_re": [84.87925168216682, 174.0499640805827, 
128.01470546846983, 89.75665227502037, 120.6022892847454, 127.64144007588739, 
133.06338854462322, 142.02282193948272, 84.05478733152907, 101.68855123701229], 
"eval_len": [17, 33, 28, 18, 23, 25, 26, 27, 17, 20]}

  7%|▋         | 69998/1000000 [1:22:40<14:17:53, 18.07it/s]global step 70000, trans_decision ep_re 156.26813552328375

{"global_step": 70000, "eval_re": [83.96248245100045, 136.7584874996197, 
112.56766715232378, 148.56759785344036, 134.90389059188803, 145.9770991476535, 
189.93212241875275, 305.53022754459266, 174.4517937713641, 130.02998680220205], 
"eval_len": [17, 26, 22, 29, 27, 28, 36, 59, 36, 25]}

  8%|▊         | 79998/1000000 [1:35:20<13:54:12, 18.38it/s]global step 80000, trans_decision ep_re 137.8754400747318

{"global_step": 80000, "eval_re": [164.1239079048302, 111.82665106884559, 
144.939179759094, 143.85262277506445, 83.9121247531898, 107.39295677498731, 
112.16150235319337, 189.78878412674345, 230.83147949446058, 89.92519173690923], 
"eval_len": [33, 22, 29, 28, 17, 21, 22, 37, 46, 18]}

  9%|▉         | 89998/1000000 [1:47:50<14:10:58, 17.82it/s]global step 90000, trans_decision ep_re 108.09002353441159

{"global_step": 90000, "eval_re": [162.55511329220496, 96.7147229239677, 
84.02096432658345, 89.28818748105344, 130.5367008444843, 128.8672308307116, 
113.95955791769856, 96.01944269226982, 83.98750125119341, 94.95081378394842], 
"eval_len": [32, 19, 17, 18, 25, 25, 22, 19, 17, 19]}

 10%|▉         | 99998/1000000 [2:00:30<13:51:33, 18.04it/s]global step 100000, trans_decision ep_re 127.2609578290824

{"global_step": 100000, "eval_re": [125.72992078122874, 83.97996579404207, 
130.88271904367116, 94.36470941842506, 135.36517416974525, 112.43939860970602, 
129.1514712951434, 146.0334421131563, 134.59462887305446, 180.06814819265156], 
"eval_len": [24, 17, 25, 19, 26, 22, 25, 28, 26, 35]}

 11%|█         | 109998/1000000 [2:13:10<13:44:03, 18.00it/s]global step 110000, trans_decision ep_re 111.68973391484587

{"global_step": 110000, "eval_re": [95.90327593882714, 91.56945908771041, 
111.16174349577491, 161.74991155308507, 141.1789709449341, 94.9938931726217, 
89.42330850562209, 95.51049498275466, 111.51778215686873, 123.88849931026], 
"eval_len": [19, 18, 22, 31, 27, 19, 18, 19, 22, 24]}

 12%|█▏        | 119998/1000000 [2:25:40<13:26:31, 18.19it/s]global step 120000, trans_decision ep_re 120.95226273934979

{"global_step": 120000, "eval_re": [120.35396865197609, 153.37165387180494, 
89.48223918277803, 141.88120086228062, 95.66294297339174, 83.90533025942831, 
156.38598267119957, 111.26363619528695, 118.58303150748955, 138.6326412178621], 
"eval_len": [24, 30, 18, 27, 19, 17, 31, 22, 23, 27]}

 13%|█▎        | 129998/1000000 [2:38:20<13:24:30, 18.02it/s]global step 130000, trans_decision ep_re 129.85285913218905

{"global_step": 130000, "eval_re": [102.45253642575042, 89.85250745211404, 
192.2007112784002, 171.24261554092234, 159.5735799870478, 141.13565430535957, 
102.09356716738556, 100.64383036332119, 95.8249214013923, 143.508667400197], 
"eval_len": [20, 18, 39, 35, 31, 28, 20, 20, 19, 28]}

 14%|█▍        | 139998/1000000 [2:51:00<13:17:34, 17.97it/s]global step 140000, trans_decision ep_re 116.4576952149818

{"global_step": 140000, "eval_re": [108.52979197090033, 122.62772156727267, 
153.07971974890464, 89.93470886533994, 95.24922030826401, 180.5322976174808, 
115.93125284173836, 90.13499005340284, 101.76177361520067, 106.79547556131365], 
"eval_len": [21, 24, 29, 18, 19, 36, 23, 18, 20, 21]}

 15%|█▍        | 149998/1000000 [3:03:40<13:06:18, 18.02it/s]global step 150000, trans_decision ep_re 130.8958594507451

{"global_step": 150000, "eval_re": [102.99640326702654, 141.9629164911188, 
89.7823538348324, 199.08208273929648, 170.34477620926774, 101.67994479343916, 
138.21158382975528, 95.39316031890847, 139.1104147560858, 130.39495826771997], 
"eval_len": [20, 27, 18, 38, 33, 20, 29, 19, 27, 25]}

 16%|█▌        | 159998/1000000 [3:16:20<12:53:39, 18.10it/s]global step 160000, trans_decision ep_re 132.09983366227812

{"global_step": 160000, "eval_re": [102.14077924880574, 191.47362649057558, 
107.15300904974212, 148.3186057534815, 125.80957115119853, 142.43845792265617, 
83.87635765178784, 119.59240736846027, 157.26845291408372, 142.9270690719896], 
"eval_len": [20, 40, 21, 29, 25, 28, 17, 23, 32, 28]}

 17%|█▋        | 169998/1000000 [3:29:00<12:43:37, 18.12it/s]global step 170000, trans_decision ep_re 111.95912247685901

{"global_step": 170000, "eval_re": [106.24843762207269, 118.89176179232918, 
89.9239968668568, 102.25776720784782, 95.59937702387015, 106.59813760452597, 
95.58170039092235, 132.24970553829326, 152.56130095096245, 119.67903977090945], 
"eval_len": [21, 23, 18, 20, 19, 21, 19, 26, 29, 23]}

 18%|█▊        | 179998/1000000 [3:41:40<12:43:39, 17.90it/s]global step 180000, trans_decision ep_re 145.53864457382588

{"global_step": 180000, "eval_re": [124.7891599111192, 172.76276163816394, 
185.1468397205753, 107.64501116405647, 159.13089065039597, 168.2801962863314, 
193.4065821543915, 95.73069007513077, 158.32216282288084, 90.17215131521343], 
"eval_len": [24, 36, 36, 21, 30, 33, 37, 19, 31, 18]}

 19%|█▉        | 189998/1000000 [3:54:20<12:34:21, 17.90it/s]global step 190000, trans_decision ep_re 122.7655534953229

{"global_step": 190000, "eval_re": [101.32164566998753, 135.7778569618755, 
128.9008719579769, 102.68599655720381, 96.81986854874161, 95.97079699856828, 
134.9122651893882, 167.71343069965295, 102.47213407698526, 161.0806682928491], 
"eval_len": [20, 26, 25, 20, 19, 19, 26, 34, 20, 32]}

 20%|█▉        | 199998/1000000 [4:07:00<12:15:20, 18.13it/s]global step 200000, trans_decision ep_re 138.07333836794396

{"global_step": 200000, "eval_re": [100.59597439386077, 146.82163961007515, 
83.8557982070302, 115.69537995361553, 390.5748983374988, 99.97035508467692, 
118.71053651042351, 95.41494219288104, 140.27498672058644, 88.81887266879123], 
"eval_len": [20, 29, 17, 23, 76, 20, 23, 19, 27, 18]}

 21%|██        | 209998/1000000 [4:19:30<12:02:13, 18.23it/s]global step 210000, trans_decision ep_re 136.790144748601

{"global_step": 210000, "eval_re": [88.9663459582553, 89.1707928104029, 
111.96816364890948, 78.97202732447057, 167.5938094661902, 183.7639004829917, 
139.6305879234689, 263.2317957898199, 154.576630494438, 90.02739358706299], 
"eval_len": [18, 18, 23, 16, 32, 35, 27, 52, 30, 18]}

 22%|██▏       | 219998/1000000 [4:32:00<11:46:01, 18.41it/s]global step 220000, trans_decision ep_re 166.93296808171763

{"global_step": 220000, "eval_re": [197.37647298463978, 147.13805576485728, 
95.50763607125238, 113.49895894955995, 142.81995420495207, 155.10199542712243, 
153.29829045758876, 112.00736966892778, 127.98854727053664, 424.592400017739], 
"eval_len": [38, 28, 19, 22, 28, 30, 30, 22, 25, 81]}

 23%|██▎       | 229998/1000000 [4:44:40<11:45:52, 18.18it/s]global step 230000, trans_decision ep_re 140.884679646105

{"global_step": 230000, "eval_re": [427.9266686063432, 118.58730804903523, 
88.86048900203402, 95.10423506903813, 96.52992768137095, 102.75594299054387, 
114.2631279028427, 119.89308176590382, 154.74270780931585, 90.18330758462263], 
"eval_len": [78, 23, 18, 19, 19, 20, 22, 23, 32, 18]}

 24%|██▍       | 239998/1000000 [4:57:00<11:23:33, 18.53it/s]global step 240000, trans_decision ep_re 144.5151682385068

{"global_step": 240000, "eval_re": [143.45760983872844, 107.36613854843036, 
89.27454024575441, 118.65997483522553, 162.05868313627275, 100.89709157200701, 
102.22806213020775, 184.5027878764587, 90.76887509043088, 345.9379191115521], 
"eval_len": [28, 21, 18, 23, 31, 20, 20, 35, 18, 65]}

 25%|██▍       | 249998/1000000 [5:09:30<11:21:44, 18.34it/s]global step 250000, trans_decision ep_re 139.7825308673094

{"global_step": 250000, "eval_re": [135.69022296134955, 83.94501284471905, 
134.7228083361379, 199.97687004553217, 159.2272117439589, 184.19843375086683, 
105.28349120784873, 164.17898681042345, 134.82413026264678, 95.77814070961067], 
"eval_len": [27, 17, 26, 39, 31, 35, 21, 31, 26, 19]}

 26%|██▌       | 259998/1000000 [5:22:00<11:15:14, 18.26it/s]global step 260000, trans_decision ep_re 151.2174063628527

{"global_step": 260000, "eval_re": [177.21890745262877, 160.8132719945428, 
89.634784341756, 172.7125998357601, 95.62559599739207, 121.7552941856664, 
89.56767116598753, 139.47879069371905, 176.37806050725914, 288.9890874538151], 
"eval_len": [35, 32, 18, 35, 19, 24, 18, 28, 33, 57]}

 27%|██▋       | 269998/1000000 [5:34:30<11:03:49, 18.33it/s]global step 270000, trans_decision ep_re 131.04383536836696

{"global_step": 270000, "eval_re": [122.79711229704206, 141.20039151083625, 
88.92168431935896, 118.02727993159053, 154.59100660951992, 166.7429088869015, 
125.30165995299154, 118.19653297624949, 103.28104797051874, 171.3787292286604], 
"eval_len": [24, 29, 18, 23, 31, 32, 24, 23, 20, 33]}

 28%|██▊       | 279998/1000000 [5:47:00<10:50:45, 18.44it/s]global step 280000, trans_decision ep_re 130.82205702773734

{"global_step": 280000, "eval_re": [202.77689255090473, 176.09338407800152, 
89.43046033842359, 91.23156861334074, 112.80296202957723, 175.08902886770477, 
84.01639151446058, 129.32735897731976, 95.04248284572134, 152.41004046191938], 
"eval_len": [39, 34, 18, 18, 22, 34, 17, 25, 19, 30]}

 29%|██▉       | 289998/1000000 [5:59:30<10:44:42, 18.35it/s]global step 290000, trans_decision ep_re 169.0905858470882

{"global_step": 290000, "eval_re": [186.75442958847626, 145.05852697682207, 
95.28472493105534, 115.34071091607808, 171.85886574021754, 88.87077223383422, 
134.76530897222995, 121.9604801554368, 490.8327071121754, 140.1793318445562], 
"eval_len": [37, 28, 19, 23, 33, 18, 26, 24, 91, 28]}

 30%|██▉       | 299998/1000000 [6:12:00<10:33:15, 18.42it/s]global step 300000, trans_decision ep_re 123.58872404589633

{"global_step": 300000, "eval_re": [118.52269247333888, 187.99217568266823, 
100.96974818506739, 156.34980756281658, 126.1054408089971, 95.61801502850085, 
107.88457306228514, 151.36432417102216, 89.91259162385882, 101.16787186040823], 
"eval_len": [23, 36, 20, 30, 24, 19, 21, 30, 18, 20]}

 31%|███       | 309998/1000000 [6:24:30<10:29:32, 18.27it/s]global step 310000, trans_decision ep_re 136.96146733271186

{"global_step": 310000, "eval_re": [89.08298791542582, 95.46553223574459, 
151.167518333003, 113.65902674898987, 286.94175596679094, 174.8389377887178, 
118.54107110271319, 102.3843590071621, 131.69703012274434, 105.83645410582699], 
"eval_len": [18, 19, 29, 22, 55, 35, 23, 20, 25, 21]}

 32%|███▏      | 319998/1000000 [6:37:00<10:27:37, 18.06it/s]global step 320000, trans_decision ep_re 133.46032001335308

{"global_step": 320000, "eval_re": [192.88304358182637, 99.97736940593111, 
89.03936209741516, 84.06780967921507, 106.56896413737364, 96.74268115437009, 
245.61270060027607, 127.54787008946376, 119.68130354161724, 172.4820958460426], 
"eval_len": [38, 20, 18, 17, 21, 19, 46, 25, 24, 34]}

 33%|███▎      | 329998/1000000 [6:49:30<10:11:57, 18.25it/s]global step 330000, trans_decision ep_re 117.70351781595636

{"global_step": 330000, "eval_re": [123.13802540795275, 124.47633128930832, 
84.2172376439419, 90.24447668906492, 148.58054515656065, 111.9700596126085, 
96.28053496841119, 96.53981255784447, 119.47706324842821, 182.11109158544284], 
"eval_len": [24, 24, 17, 18, 29, 22, 19, 19, 23, 37]}

 34%|███▍      | 339998/1000000 [7:02:00<10:05:39, 18.16it/s]global step 340000, trans_decision ep_re 143.69028610799185

{"global_step": 340000, "eval_re": [84.14648802568071, 129.70288611173916, 
112.08717283142784, 116.3105296322379, 137.46934134129762, 244.97175144540265, 
253.73110851169642, 172.6893887938672, 89.53732617366809, 96.25686821290081], 
"eval_len": [17, 25, 22, 23, 27, 51, 50, 32, 18, 19]}

 35%|███▍      | 349998/1000000 [7:14:30<9:49:35, 18.37it/s]global step 350000, trans_decision ep_re 130.1004098828872

{"global_step": 350000, "eval_re": [113.47168470248705, 130.50830491027722, 
95.64767489705088, 141.40862316185473, 132.139679781135, 152.809774844682, 
139.04292580911377, 162.84730534311754, 90.47331610304288, 142.654809276111], 
"eval_len": [22, 25, 19, 28, 26, 30, 27, 32, 18, 27]}

 36%|███▌      | 359998/1000000 [7:27:00<9:37:23, 18.47it/s]global step 360000, trans_decision ep_re 128.05434512898347

{"global_step": 360000, "eval_re": [184.65622767364925, 145.26230488883007, 
137.95928202828404, 122.82716389709195, 89.48320275009542, 186.25867901219138, 
95.06979613015446, 95.38679583926978, 107.36949108247379, 116.27050798779496], 
"eval_len": [37, 29, 27, 25, 18, 36, 19, 19, 21, 23]}

 37%|███▋      | 369998/1000000 [7:39:30<9:27:11, 18.51it/s]global step 370000, trans_decision ep_re 119.37737288455949

{"global_step": 370000, "eval_re": [119.18752051929539, 137.7391073707531, 
89.80053586359274, 146.23936079315172, 118.79302533288586, 106.06703153853097, 
105.9086814689739, 89.30361593105911, 179.02071656774055, 101.71413345961159], 
"eval_len": [23, 27, 18, 28, 23, 21, 21, 18, 35, 20]}

 38%|███▊      | 379998/1000000 [7:52:00<9:19:00, 18.49it/s]global step 380000, trans_decision ep_re 128.4942486131339

{"global_step": 380000, "eval_re": [90.0661332573503, 135.5722408276905, 
123.1930007507292, 101.66449612434663, 103.43261152894101, 100.95741273790007, 
84.03121779843401, 107.03005810474093, 137.1213518451749, 301.8739631560314], 
"eval_len": [18, 30, 24, 20, 20, 20, 17, 21, 28, 56]}

 39%|███▉      | 389998/1000000 [8:04:30<9:13:26, 18.37it/s]global step 390000, trans_decision ep_re 142.6901048250858

{"global_step": 390000, "eval_re": [141.13163011643223, 147.52497808521244, 
149.48818666365307, 102.23471666111708, 144.8212123301003, 138.93563828753486, 
101.63206827154954, 117.73764773240292, 294.01297189312703, 89.38199820972852], 
"eval_len": [27, 30, 30, 20, 28, 27, 20, 23, 57, 18]}

 40%|███▉      | 399998/1000000 [8:17:00<9:11:11, 18.14it/s]global step 400000, trans_decision ep_re 131.83835860496168

{"global_step": 400000, "eval_re": [216.8435407371219, 133.57976666001818, 
138.14644976127533, 113.91205585202863, 137.5689835232792, 123.99140502698866, 
138.75806493457281, 95.35973262852312, 130.52326551651223, 89.70032140929672], 
"eval_len": [41, 26, 27, 22, 26, 24, 27, 19, 25, 18]}

 41%|████      | 409998/1000000 [8:29:20<8:53:27, 18.43it/s]global step 410000, trans_decision ep_re 116.12287732846433

{"global_step": 410000, "eval_re": [177.96158758186715, 127.92017761669346, 
113.11059200790406, 89.18252596810154, 125.95359545695, 95.4418279929497, 
101.08947922785201, 133.0494112983693, 100.72395096849672, 96.79562516545926], 
"eval_len": [35, 25, 22, 18, 25, 19, 20, 26, 20, 19]}

 42%|████▏     | 419998/1000000 [8:41:50<8:48:29, 18.29it/s]global step 420000, trans_decision ep_re 189.40559233929213

{"global_step": 420000, "eval_re": [173.87975011050509, 101.48163760039547, 
322.39541816114814, 155.421892265538, 150.1486038396296, 415.0900984445402, 
165.4034912337703, 148.82953975980087, 96.16825975511416, 165.2372322224795], 
"eval_len": [34, 20, 60, 30, 30, 80, 34, 29, 19, 32]}

 43%|████▎     | 429998/1000000 [8:54:20<8:42:42, 18.17it/s]global step 430000, trans_decision ep_re 139.95250287266367

{"global_step": 430000, "eval_re": [143.76287881896465, 110.66687151169754, 
173.54615925745625, 101.78386747783983, 165.25366518165137, 106.2947925996087, 
114.56780860016872, 132.55170447831333, 255.51710702082565, 95.58017378011077], 
"eval_len": [29, 22, 36, 20, 33, 21, 22, 26, 48, 19]}

 44%|████▍     | 439998/1000000 [9:06:50<8:27:55, 18.38it/s]global step 440000, trans_decision ep_re 129.03144096268275

{"global_step": 440000, "eval_re": [156.43153884892948, 117.35593339873218, 
137.0585412659824, 111.61579319563853, 95.01665939113771, 158.8510083761307, 
89.82575887030204, 186.12959472665867, 124.64894423087416, 113.38063732244167], 
"eval_len": [31, 23, 27, 22, 19, 31, 18, 36, 24, 22]}

 45%|████▍     | 449998/1000000 [9:19:20<8:14:49, 18.53it/s]global step 450000, trans_decision ep_re 129.0780613894679

{"global_step": 450000, "eval_re": [119.90058199114671, 116.73891148560196, 
139.95821476491906, 154.45569757675983, 131.87197485554424, 162.43022914134812, 
113.85264872672262, 116.81812942188563, 107.75073677020018, 127.00348916055104],
"eval_len": [23, 23, 27, 30, 25, 32, 22, 23, 21, 25]}

 46%|████▌     | 459998/1000000 [9:31:50<8:14:10, 18.21it/s]global step 460000, trans_decision ep_re 152.50968529382936

{"global_step": 460000, "eval_re": [132.49496043791476, 175.91736905468267, 
133.5010381752541, 132.9701989971558, 193.50036444656808, 83.91748269196246, 
162.2471443716134, 139.4902844013491, 274.80804201030594, 96.24996835148725], 
"eval_len": [28, 34, 26, 26, 37, 17, 31, 27, 54, 19]}

 47%|████▋     | 469998/1000000 [9:44:30<8:01:51, 18.33it/s]global step 470000, trans_decision ep_re 144.2061889106048

{"global_step": 470000, "eval_re": [115.41567562316011, 418.12278446016063, 
89.49371204347776, 111.63791968710338, 95.09794739542673, 125.7367328914615, 
95.9392284266347, 88.89997345092246, 122.84449273152168, 178.87342239617908], 
"eval_len": [23, 79, 18, 22, 19, 24, 19, 18, 24, 35]}

 48%|████▊     | 479998/1000000 [9:57:00<7:51:58, 18.36it/s]global step 480000, trans_decision ep_re 145.69414798427312

{"global_step": 480000, "eval_re": [89.32826383450814, 150.7168766827462, 
220.7599266939388, 95.00833253481065, 107.87673658303922, 138.30784655581272, 
336.76224642397005, 95.9857958476417, 102.34861395191491, 119.84684073434856], 
"eval_len": [18, 30, 42, 19, 21, 27, 59, 19, 20, 24]}

 49%|████▉     | 489998/1000000 [10:09:20<7:40:47, 18.45it/s]global step 490000, trans_decision ep_re 139.77627199626005

{"global_step": 490000, "eval_re": [96.31581364479214, 83.91846499773239, 
215.2607854172929, 129.62406171027303, 96.68116549681434, 150.2132991133642, 
120.28674155257355, 129.25246790379504, 163.3211829083632, 212.88873721759947], 
"eval_len": [19, 17, 43, 25, 19, 32, 23, 25, 32, 41]}

 50%|████▉     | 499998/1000000 [10:21:50<7:33:25, 18.38it/s]global step 500000, trans_decision ep_re 154.00224052559457

{"global_step": 500000, "eval_re": [117.3587844845364, 152.99530846512724, 
124.76242506784014, 114.98319036830335, 183.38124509468085, 120.77750102303382, 
191.41790862261223, 95.71770488902152, 102.53395358159324, 336.0943836591969], 
"eval_len": [23, 29, 24, 23, 35, 23, 39, 19, 20, 66]}

 51%|█████     | 509998/1000000 [10:34:20<7:23:44, 18.40it/s]global step 510000, trans_decision ep_re 136.36319185630208

{"global_step": 510000, "eval_re": [95.81612023573545, 136.84131255076022, 
263.0956599733435, 136.83297653906897, 163.49862329050234, 95.9662395774739, 
129.4620229849021, 111.59865586403603, 129.13770482225752, 101.38260272494074], 
"eval_len": [19, 27, 50, 27, 31, 19, 25, 22, 25, 20]}

 52%|█████▏    | 519998/1000000 [10:46:40<7:13:34, 18.45it/s]global step 520000, trans_decision ep_re 134.76671526129394

{"global_step": 520000, "eval_re": [170.22504716976124, 101.9804629476368, 
193.28360297080718, 130.8807360919833, 139.20314008738265, 117.94726235464029, 
130.03495129050148, 95.83414955840458, 172.62758405453818, 95.65021608728371], 
"eval_len": [33, 20, 38, 26, 31, 23, 25, 19, 33, 19]}

 53%|█████▎    | 529998/1000000 [10:59:10<7:07:13, 18.34it/s]global step 530000, trans_decision ep_re 125.064445221189

{"global_step": 530000, "eval_re": [167.85881512137288, 89.17552175030295, 
189.66255267266638, 102.58034068989004, 141.6583372270464, 84.17401142482964, 
101.1720229812718, 138.96886383491452, 100.40051678841326, 134.99346972118212], 
"eval_len": [33, 18, 37, 20, 27, 17, 20, 27, 20, 26]}

 54%|█████▍    | 539998/1000000 [11:11:30<6:57:49, 18.35it/s]global step 540000, trans_decision ep_re 106.44666424574157

{"global_step": 540000, "eval_re": [125.68572790777976, 95.4778442417504, 
120.0905219453621, 105.52075694355771, 90.18572152358722, 84.00295936922069, 
89.73462632518817, 107.06186823866918, 127.39480452494404, 119.31181143735658], 
"eval_len": [24, 19, 24, 21, 18, 17, 18, 21, 25, 23]}

 55%|█████▍    | 549998/1000000 [11:24:00<6:49:07, 18.33it/s]global step 550000, trans_decision ep_re 123.03539127203048

{"global_step": 550000, "eval_re": [129.1279007577799, 132.16137884243795, 
95.22621125308642, 126.30347087752376, 171.75783697672424, 89.56298458649674, 
128.19341431944693, 120.52790705604446, 153.53449394259079, 83.95831410817375], 
"eval_len": [25, 26, 19, 25, 34, 18, 25, 24, 30, 17]}

 56%|█████▌    | 559998/1000000 [11:36:20<6:35:46, 18.53it/s]global step 560000, trans_decision ep_re 194.34310630395748

{"global_step": 560000, "eval_re": [319.41732216842144, 100.19258075479688, 
90.63531606913556, 372.26597122207716, 140.97262464827497, 393.43053781390256, 
111.55235679600672, 113.28715780736422, 119.40099441320812, 182.276201346387], 
"eval_len": [62, 20, 18, 68, 28, 75, 22, 23, 24, 35]}

 57%|█████▋    | 569998/1000000 [11:48:50<6:30:43, 18.34it/s]global step 570000, trans_decision ep_re 145.07744186774397

{"global_step": 570000, "eval_re": [89.42957768528466, 135.60586031033637, 
149.2591952963888, 157.5612686326784, 130.47177676865905, 170.825427963337, 
125.69747281627981, 193.72492367668573, 155.61533461252702, 142.583580915263], 
"eval_len": [18, 27, 29, 32, 26, 34, 25, 38, 30, 28]}

 58%|█████▊    | 579998/1000000 [12:01:10<6:17:30, 18.54it/s]global step 580000, trans_decision ep_re 133.58584935181588

{"global_step": 580000, "eval_re": [95.8618450559801, 89.96548789003782, 
157.19818688896376, 95.38057139337405, 129.4190069809254, 150.30039992489117, 
161.64111077992018, 137.85889663112187, 153.78951817683833, 164.44346979610634],
"eval_len": [19, 18, 30, 19, 25, 29, 32, 26, 29, 34]}

 59%|█████▉    | 589998/1000000 [12:13:40<6:11:07, 18.41it/s]global step 590000, trans_decision ep_re 159.6904911256516

{"global_step": 590000, "eval_re": [112.29629879806818, 112.80280405618328, 
169.00033423229146, 148.37004304937886, 113.75819309106875, 106.68547754984992, 
154.0036887684936, 150.22731980467253, 108.07157436710399, 421.68917753940536], 
"eval_len": [22, 22, 32, 28, 22, 21, 30, 29, 21, 80]}

 60%|█████▉    | 599998/1000000 [12:26:10<6:01:24, 18.45it/s]global step 600000, trans_decision ep_re 141.4431018191015

{"global_step": 600000, "eval_re": [181.4801008954631, 107.89322115049538, 
128.75029927193248, 124.42369898636936, 123.7911765310673, 219.2844658556402, 
128.81045158803514, 89.23162240332535, 160.3258980251159, 150.44008348357082], 
"eval_len": [35, 21, 26, 24, 25, 43, 25, 18, 31, 30]}

 61%|██████    | 609998/1000000 [12:38:30<5:51:00, 18.52it/s]global step 610000, trans_decision ep_re 131.352773333621

{"global_step": 610000, "eval_re": [114.70871667788458, 162.35265586102565, 
143.23911448679357, 95.41469477544324, 95.51110407414578, 146.69877283318306, 
183.72165123175938, 84.0070871394955, 151.28970673035064, 136.58422952612875], 
"eval_len": [23, 31, 29, 19, 19, 29, 35, 17, 31, 28]}

 62%|██████▏   | 619998/1000000 [12:51:00<5:42:40, 18.48it/s]global step 620000, trans_decision ep_re 157.91290766809817

{"global_step": 620000, "eval_re": [181.22236798425794, 113.28765182391952, 
123.63367760447393, 95.05456232090913, 163.90638247194457, 89.01037404583886, 
457.5370002333965, 107.1181339293058, 118.43899539383774, 129.91993087309788], 
"eval_len": [35, 22, 24, 19, 32, 18, 89, 21, 23, 25]}

 63%|██████▎   | 629998/1000000 [13:03:20<5:34:45, 18.42it/s]global step 630000, trans_decision ep_re 117.19540017267602

{"global_step": 630000, "eval_re": [148.2448215388747, 89.0506694041617, 
102.40400811200351, 147.93401545574798, 88.94103575062007, 95.84572363783468, 
174.49129698319538, 121.2787204930985, 114.84177980444301, 88.9219305467807], 
"eval_len": [28, 18, 20, 29, 18, 19, 36, 24, 22, 18]}

 64%|██████▍   | 639998/1000000 [13:15:50<5:29:20, 18.22it/s]global step 640000, trans_decision ep_re 114.63241368143251

{"global_step": 640000, "eval_re": [148.78866674423773, 107.91005981745752, 
101.20752589068101, 134.36637785515828, 88.97687808423818, 90.67842789622223, 
84.25323406297538, 123.1423309782608, 113.10026820100376, 153.90036728409012], 
"eval_len": [30, 21, 20, 27, 18, 18, 17, 24, 22, 32]}

 65%|██████▍   | 649998/1000000 [13:28:20<5:17:04, 18.40it/s]global step 650000, trans_decision ep_re 135.1464880984224

{"global_step": 650000, "eval_re": [316.8143544481056, 113.12879855727348, 
113.79833713637132, 101.92834077460313, 163.73781185605267, 96.13189537520955, 
101.47377368614283, 158.18739382502997, 102.25209849711419, 84.01207682832116], 
"eval_len": [60, 22, 22, 20, 32, 19, 20, 30, 20, 17]}

 66%|██████▌   | 659998/1000000 [13:40:50<5:08:08, 18.39it/s]global step 660000, trans_decision ep_re 130.74518323591204

{"global_step": 660000, "eval_re": [119.41512637122923, 146.9835464716866, 
152.2906732784445, 140.38761125572518, 113.6982353666284, 152.39659016417693, 
88.84673300820943, 154.0200141248396, 95.21233322013373, 144.20096909804656], 
"eval_len": [23, 30, 30, 27, 22, 30, 18, 30, 19, 28]}

 67%|██████▋   | 669998/1000000 [13:53:10<4:59:06, 18.39it/s]global step 670000, trans_decision ep_re 120.91434928734748

{"global_step": 670000, "eval_re": [187.80369390155963, 152.73522915859235, 
102.29523025540364, 112.94340142375239, 102.22894448929392, 83.9749327717929, 
129.05024204107707, 152.2370124008316, 96.5517827872763, 89.32302364389491], 
"eval_len": [40, 29, 20, 22, 20, 17, 25, 29, 19, 18]}

 68%|██████▊   | 679998/1000000 [14:05:41<4:52:50, 18.21it/s]global step 680000, trans_decision ep_re 110.08223298465639

{"global_step": 680000, "eval_re": [162.66802777423436, 94.81870650347504, 
148.81566495806112, 88.8951388361156, 89.0350100388894, 117.12004778177922, 
96.12574955002623, 90.14244001481457, 117.97309204381617, 95.22845234535235], 
"eval_len": [31, 19, 29, 18, 18, 23, 19, 18, 23, 19]}

 69%|██████▉   | 689998/1000000 [14:18:11<4:40:59, 18.39it/s]global step 690000, trans_decision ep_re 133.3286554696257

{"global_step": 690000, "eval_re": [140.61196918859895, 275.47795655159945, 
97.1026682697948, 119.39327122597233, 134.02596980032814, 134.6552177369285, 
156.94985840394375, 106.09715477301289, 83.876357393448, 85.09613135263031], 
"eval_len": [28, 50, 19, 24, 26, 27, 31, 21, 17, 17]}

 70%|██████▉   | 699998/1000000 [14:30:31<4:31:51, 18.39it/s]global step 700000, trans_decision ep_re 181.5989386797924

{"global_step": 700000, "eval_re": [101.57940190702305, 94.94362509146814, 
368.6820626078122, 273.7439793898164, 122.4088554491747, 166.49258342329918, 
151.39603297934767, 222.1519795010868, 170.49551353650156, 144.09535291239456], 
"eval_len": [20, 19, 69, 59, 24, 32, 30, 46, 34, 28]}

 71%|███████   | 709998/1000000 [14:43:01<4:22:01, 18.45it/s]global step 710000, trans_decision ep_re 135.36271578535215

{"global_step": 710000, "eval_re": [176.3917454696057, 102.47216800703242, 
182.9173787105053, 108.37882512144088, 89.57791338146754, 132.73504085448548, 
95.48659832316562, 154.72137475739538, 180.55890845762136, 130.38720477080196], 
"eval_len": [33, 20, 36, 21, 18, 26, 19, 30, 36, 26]}

 72%|███████▏  | 719998/1000000 [14:55:21<4:13:38, 18.40it/s]global step 720000, trans_decision ep_re 120.04543917092606

{"global_step": 720000, "eval_re": [83.91020412057543, 107.48546602180046, 
112.73490771144327, 131.0975908958908, 110.94419531879011, 164.72537558150094, 
125.27507007887431, 167.3880778677465, 101.10374954717635, 95.78975456546225], 
"eval_len": [17, 21, 22, 27, 22, 33, 24, 32, 20, 19]}

 73%|███████▎  | 729998/1000000 [15:07:41<4:02:08, 18.58it/s]global step 730000, trans_decision ep_re 155.24471272742193

{"global_step": 730000, "eval_re": [318.11237596267307, 110.79713226090976, 
140.3957711140815, 83.92486644872508, 99.30871322915779, 112.01433480863423, 
184.9180127449715, 131.10609017816827, 209.93159879279344, 161.93823173410476], 
"eval_len": [60, 22, 27, 17, 20, 22, 36, 25, 39, 31]}

 74%|███████▍  | 739998/1000000 [15:20:11<3:55:17, 18.42it/s]global step 740000, trans_decision ep_re 119.57768495873651

{"global_step": 740000, "eval_re": [113.04885077459333, 132.01766495813388, 
173.53369849474106, 112.04061912674752, 101.85913933372709, 109.30990027599933, 
151.95102878281625, 96.19319102826033, 121.87645298769003, 83.94630382465623], 
"eval_len": [22, 25, 33, 22, 20, 21, 29, 19, 26, 17]}

 75%|███████▍  | 749998/1000000 [15:32:31<3:45:13, 18.50it/s]global step 750000, trans_decision ep_re 123.39998815060808

{"global_step": 750000, "eval_re": [99.69348188996786, 154.55186373246707, 
139.52276066736746, 89.47138541661364, 123.844738609423, 114.67940853778997, 
130.81888869456822, 161.6825532719534, 124.30469851605831, 95.43010216987184], 
"eval_len": [20, 32, 27, 18, 24, 23, 26, 31, 24, 19]}

 76%|███████▌  | 759998/1000000 [15:45:01<3:37:40, 18.38it/s]global step 760000, trans_decision ep_re 123.05969416346827

{"global_step": 760000, "eval_re": [130.41669820131202, 152.86126041597308, 
111.10740912864391, 148.59894531741503, 107.43484950514633, 102.40996031865254, 
116.20819846683634, 83.79915434583693, 163.62805998029813, 114.13240595456845], 
"eval_len": [25, 31, 22, 29, 21, 20, 24, 17, 33, 22]}

 77%|███████▋  | 769998/1000000 [15:57:31<3:27:20, 18.49it/s]global step 770000, trans_decision ep_re 149.94503294595694

{"global_step": 770000, "eval_re": [128.82390204960288, 203.9956657344409, 
108.38664948351837, 106.11278045163384, 315.3258920469491, 200.75962302951734, 
119.8054250944174, 83.97238265999066, 120.26191036962584, 112.00609853987288], 
"eval_len": [25, 40, 21, 21, 58, 39, 23, 17, 23, 22]}

 78%|███████▊  | 779998/1000000 [16:09:51<3:19:27, 18.38it/s]global step 780000, trans_decision ep_re 136.6520885128703

{"global_step": 780000, "eval_re": [120.01402931418869, 151.1709701786899, 
111.72980392924192, 125.67227094350477, 175.61795714966837, 183.47435032702805, 
128.7441852250648, 106.10993937726165, 102.19455500379392, 161.79282368026077], 
"eval_len": [23, 30, 22, 25, 36, 37, 25, 21, 20, 31]}

 79%|███████▉  | 789998/1000000 [16:22:21<3:08:57, 18.52it/s]global step 790000, trans_decision ep_re 120.86418763760511

{"global_step": 790000, "eval_re": [102.2812639266569, 181.13695275643113, 
122.46418737156306, 136.14254031655838, 117.79378700010916, 123.41400057612404, 
96.57532802198058, 130.24546340314123, 108.09624073085882, 90.49211227262775], 
"eval_len": [20, 35, 24, 28, 23, 24, 19, 25, 21, 18]}

 80%|███████▉  | 799998/1000000 [16:34:51<3:00:52, 18.43it/s]global step 800000, trans_decision ep_re 115.22729304525178

{"global_step": 800000, "eval_re": [123.98709890422109, 96.75831107045114, 
165.07764320210364, 84.02884314027881, 101.98402290633668, 88.68255451012529, 
161.474574653285, 126.64574612541118, 83.88235650920663, 119.75177943109827], 
"eval_len": [24, 19, 31, 17, 20, 18, 31, 24, 17, 23]}

 81%|████████  | 809998/1000000 [16:47:21<2:52:11, 18.39it/s]global step 810000, trans_decision ep_re 146.76647555371437

{"global_step": 810000, "eval_re": [195.9483010850063, 127.97276119976587, 
101.1276105438765, 112.948419692, 118.24177013432416, 123.54899299307304, 
173.10637372793013, 83.81205558480636, 128.51783160089323, 302.440638975468], 
"eval_len": [40, 25, 20, 22, 23, 24, 34, 17, 25, 61]}

 82%|████████▏ | 819998/1000000 [16:59:51<2:42:48, 18.43it/s]global step 820000, trans_decision ep_re 116.24349756554565

{"global_step": 820000, "eval_re": [88.7948183836042, 119.38698042702852, 
101.83991090770391, 96.21822997833762, 90.07395824128044, 163.71925756327525, 
101.32582904644329, 89.95149493231764, 203.39346663099403, 107.73102954447167], 
"eval_len": [18, 23, 20, 19, 18, 32, 20, 18, 41, 21]}

 83%|████████▎ | 829998/1000000 [17:12:21<2:33:41, 18.44it/s]global step 830000, trans_decision ep_re 120.52807322240228

{"global_step": 830000, "eval_re": [137.76189212329243, 90.23634112091693, 
128.9521351465319, 163.2748018850025, 78.96046995014952, 151.72223860576003, 
127.19354510613525, 118.16498189045055, 113.13518965276957, 95.87913674301413], 
"eval_len": [27, 18, 25, 32, 16, 32, 25, 23, 22, 19]}

 84%|████████▍ | 839998/1000000 [17:24:51<2:25:49, 18.29it/s]global step 840000, trans_decision ep_re 121.98788434295793

{"global_step": 840000, "eval_re": [168.8186077439362, 84.01326475960042, 
218.03552450609038, 89.09056268553229, 111.70130365568511, 89.86860940701166, 
131.01100170221324, 141.70295898199774, 89.98862603759622, 95.64838394991601], 
"eval_len": [33, 17, 41, 18, 22, 18, 25, 28, 18, 19]}

 85%|████████▍ | 849998/1000000 [17:37:21<2:16:33, 18.31it/s]global step 850000, trans_decision ep_re 139.95094032504784

{"global_step": 850000, "eval_re": [156.86253761387798, 143.0221065685811, 
157.93949348682762, 128.2087173043737, 133.83503317113383, 142.94848729394405, 
151.95763769192504, 126.0921726635673, 89.68940939796423, 168.9538080582837], 
"eval_len": [31, 28, 31, 25, 26, 27, 29, 24, 18, 34]}

 86%|████████▌ | 859998/1000000 [17:49:41<2:07:58, 18.23it/s]global step 860000, trans_decision ep_re 125.27460827384661

{"global_step": 860000, "eval_re": [95.23143769706357, 96.63071688625917, 
123.28901521120757, 89.04586182628013, 83.84453240332013, 125.76095437977925, 
101.17521506350718, 89.51197878672032, 124.1647682243297, 324.09160225999926], 
"eval_len": [19, 19, 24, 18, 17, 27, 20, 18, 24, 65]}

 87%|████████▋ | 869998/1000000 [18:02:11<1:59:45, 18.09it/s]global step 870000, trans_decision ep_re 125.18726569399291

{"global_step": 870000, "eval_re": [157.80978807136458, 169.9490085833845, 
182.95414829858177, 127.36061335743865, 89.49049555006621, 94.85455479845541, 
101.95405011540738, 107.29867964161815, 83.69574283315609, 136.5055756904561], 
"eval_len": [30, 34, 36, 25, 18, 19, 20, 21, 17, 26]}

 88%|████████▊ | 879998/1000000 [18:14:41<1:50:57, 18.03it/s]global step 880000, trans_decision ep_re 134.985329856991

{"global_step": 880000, "eval_re": [115.8397269655116, 95.59166035534663, 
113.84910579103695, 95.41685301297264, 287.8314543024196, 200.05595454721885, 
109.64399240459909, 89.05591225697839, 95.41730477143683, 147.1513341623896], 
"eval_len": [23, 19, 22, 19, 53, 39, 22, 18, 19, 29]}

 89%|████████▉ | 889998/1000000 [18:27:11<1:41:02, 18.14it/s]global step 890000, trans_decision ep_re 125.20921561704031

{"global_step": 890000, "eval_re": [101.966978352073, 146.43773791359652, 
118.50017852287964, 96.83417537394826, 125.01251752610969, 90.82204183720452, 
196.14659982694087, 161.69929289788092, 103.0944456355327, 111.57818828423673], 
"eval_len": [20, 29, 23, 19, 24, 18, 38, 31, 20, 22]}

 90%|████████▉ | 899998/1000000 [18:39:41<1:30:36, 18.39it/s]global step 900000, trans_decision ep_re 143.6117362698762

{"global_step": 900000, "eval_re": [155.67265038983604, 113.74430944052914, 
118.73536139738098, 120.20494112572352, 157.27969867198297, 185.30831724428612, 
225.4892955016895, 150.60406780944078, 125.17264623816928, 83.90607487972346], 
"eval_len": [31, 22, 23, 24, 30, 35, 45, 29, 25, 17]}

 91%|█████████ | 909998/1000000 [18:52:11<1:22:38, 18.15it/s]global step 910000, trans_decision ep_re 128.94083322139153

{"global_step": 910000, "eval_re": [151.1239551836872, 116.23711235730809, 
163.3606454102173, 135.98755561005694, 146.19629805697684, 83.83405532436706, 
153.9243604417776, 148.00939019617513, 101.72604940285994, 89.0089102304891], 
"eval_len": [31, 23, 33, 26, 28, 17, 29, 29, 20, 18]}

 92%|█████████▏| 919998/1000000 [19:04:51<1:13:43, 18.09it/s]global step 920000, trans_decision ep_re 117.0484696864108

{"global_step": 920000, "eval_re": [107.09405644624032, 89.72761685040444, 
118.76309390018484, 185.70506065638986, 124.61974202713517, 90.32778874508523, 
101.86139475985051, 136.2205058015714, 114.1339898020166, 102.03144787522947], 
"eval_len": [21, 18, 23, 37, 24, 18, 20, 26, 22, 20]}

 93%|█████████▎| 929998/1000000 [19:17:31<1:04:41, 18.03it/s]global step 930000, trans_decision ep_re 140.6013677131954

{"global_step": 930000, "eval_re": [102.25223883778422, 129.46497374080784, 
147.89785950286856, 89.2661603810815, 305.4541672213062, 94.63108279178765, 
101.95401867297878, 124.87603827517322, 153.56599971270816, 156.65113799545776],
"eval_len": [20, 25, 29, 18, 59, 19, 20, 24, 29, 31]}

 94%|█████████▍| 939998/1000000 [19:30:01<55:09, 18.13it/s]global step 940000, trans_decision ep_re 145.21701428716534

{"global_step": 940000, "eval_re": [84.00463863942593, 228.58145717795904, 
129.235226450815, 93.84009456927383, 315.97004832699736, 89.46138639906519, 
171.88634933792747, 100.40481480063833, 136.7291714522367, 102.05695571731432], 
"eval_len": [17, 44, 25, 19, 63, 18, 34, 20, 26, 20]}

 95%|█████████▍| 949998/1000000 [19:42:41<45:45, 18.22it/s]global step 950000, trans_decision ep_re 160.17728150257366

{"global_step": 950000, "eval_re": [84.06125202945987, 112.87935394032755, 
441.318480373205, 107.60951260452404, 145.16121452082086, 110.85702848806005, 
224.939076106286, 127.77018791868058, 151.1661478720201, 96.01056117235277], 
"eval_len": [17, 22, 87, 22, 29, 22, 43, 25, 30, 19]}

 96%|█████████▌| 959998/1000000 [19:55:21<36:47, 18.12it/s]global step 960000, trans_decision ep_re 167.65072606621396

{"global_step": 960000, "eval_re": [101.99989800632761, 119.55259408695775, 
122.49688459317757, 118.41028817542882, 95.9396850720581, 192.199135980066, 
207.8325044870168, 491.63229333484026, 109.36921160507772, 117.0747653211892], 
"eval_len": [20, 23, 24, 23, 19, 36, 39, 92, 21, 23]}

 97%|█████████▋| 969998/1000000 [20:08:01<27:41, 18.06it/s]global step 970000, trans_decision ep_re 151.09017069811074

{"global_step": 970000, "eval_re": [95.61561579562434, 95.18960080596598, 
189.15969051622463, 148.8648090291631, 111.45242240482018, 340.4848393519538, 
157.67026078278008, 113.03553348602362, 122.08418900491932, 137.34474580363243],
"eval_len": [19, 19, 36, 29, 22, 65, 32, 22, 24, 27]}

 98%|█████████▊| 979998/1000000 [20:20:41<18:42, 17.82it/s]global step 980000, trans_decision ep_re 114.57124480429415

{"global_step": 980000, "eval_re": [111.85425653724187, 139.0424216331246, 
96.02381120498181, 112.63693350759497, 89.44774051041777, 139.2111132732465, 
157.70935370548955, 95.5470561170207, 113.65032247733483, 90.58943907648893], 
"eval_len": [22, 27, 19, 22, 18, 28, 33, 19, 22, 18]}

 99%|█████████▉| 989998/1000000 [20:33:21<09:16, 17.98it/s]global step 990000, trans_decision ep_re 154.2691226540742

{"global_step": 990000, "eval_re": [180.9284839148298, 102.2395214877993, 
101.77075806129345, 139.45547107216348, 111.89635083145359, 150.1736182926224, 
368.8977235412647, 125.35425695442167, 100.04241967494893, 161.93262270994484], 
"eval_len": [35, 20, 20, 27, 22, 30, 71, 25, 20, 32]}

100%|█████████▉| 999998/1000000 [20:45:51<00:00, 18.12it/s]global step 1000000, trans_decision ep_re 117.75897333453236

{"global_step": 1000000, "eval_re": [127.91297780486396, 131.74724610262842, 
83.99401892670302, 142.99663545818947, 95.22614140696164, 111.62208168233279, 
119.94127829047196, 165.94538567403285, 83.93323697058321, 114.27073102855616], 
"eval_len": [25, 26, 17, 28, 19, 22, 24, 34, 17, 23]}

100%|██████████| 1000000/1000000 [20:45:57<00:00, 13.38it/s]
