
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [03:50<8:48:49, 31.20it/s]global step 10000, trans_decision ep_re 199.0037136852432

{"global_step": 10000, "eval_re": [157.24523485455757, 162.96359610807121, 
141.56365849229138, 369.2608165466413, 156.16693797296884, 156.60315765855978, 
189.1441827667329, 167.37283481153574, 140.70971088143347, 349.0070067596397], 
"eval_len": [30, 31, 27, 68, 30, 30, 36, 32, 27, 66]}

  2%|▏         | 19999/1000000 [11:40<8:40:14, 31.40it/s]global step 20000, trans_decision ep_re 157.3150584863436

{"global_step": 20000, "eval_re": [136.11714328204047, 187.47597393770158, 
178.3483399094151, 141.669955425945, 130.88367686284846, 156.03173152889767, 
162.90472025495984, 151.65576430775636, 160.91240310795206, 167.1508762459193], 
"eval_len": [26, 36, 34, 27, 25, 30, 31, 29, 31, 32]}

  3%|▎         | 29999/1000000 [18:53<8:33:41, 31.47it/s]global step 30000, trans_decision ep_re 170.5579600078999

{"global_step": 30000, "eval_re": [187.87162189310953, 185.14808091563492, 
145.6771706177967, 181.80943637098872, 177.31272518508882, 157.21456705609245, 
156.69235966397318, 172.99709478443572, 167.98030907239965, 172.87623451947925],
"eval_len": [36, 36, 28, 35, 34, 30, 30, 33, 32, 33]}

  4%|▍         | 39997/1000000 [26:30<8:27:48, 31.51it/s]global step 40000, trans_decision ep_re 177.53831876038686

{"global_step": 40000, "eval_re": [151.6099722929245, 125.44270182621277, 
125.26063588045321, 177.5320856524599, 322.1749659917905, 167.68291238374186, 
167.4501601305432, 167.0024060158986, 177.6553028229944, 193.57204460684991], 
"eval_len": [29, 24, 24, 34, 63, 32, 32, 32, 34, 37]}

  5%|▍         | 49999/1000000 [33:44<8:23:13, 31.46it/s]global step 50000, trans_decision ep_re 180.1653141331235

{"global_step": 50000, "eval_re": [181.95925003690428, 130.61878863155871, 
198.03483866376416, 166.42939375833822, 177.55484634653564, 328.20839480792637, 
172.97587367661004, 152.2876952592346, 131.324815869656, 162.25924428070678], 
"eval_len": [35, 25, 38, 32, 34, 64, 33, 29, 25, 31]}

  6%|▌         | 59999/1000000 [41:21<8:42:44, 29.97it/s]global step 60000, trans_decision ep_re 155.48223937371512

{"global_step": 60000, "eval_re": [146.51406613249327, 167.32179412975162, 
162.02535886109118, 156.4478736823719, 136.39803410700495, 141.34075324226004, 
176.31255128631082, 171.2090868102619, 135.91434340455416, 161.33853208105162], 
"eval_len": [28, 32, 31, 30, 26, 27, 34, 33, 26, 31]}

  7%|▋         | 69999/1000000 [48:33<8:11:50, 31.51it/s]global step 70000, trans_decision ep_re 157.0278052929963

{"global_step": 70000, "eval_re": [151.9202358412592, 182.20839038843476, 
146.36809933216207, 160.97219702401128, 136.05435470554397, 162.0591046707181, 
176.35767800451967, 151.8881296016862, 155.88526009802663, 146.56460326360093], 
"eval_len": [29, 35, 28, 31, 26, 31, 34, 29, 30, 28]}

  8%|▊         | 79999/1000000 [55:55<8:15:13, 30.96it/s]global step 80000, trans_decision ep_re 202.0349875939059

{"global_step": 80000, "eval_re": [193.308843099392, 152.28888509234335, 
156.47582157230562, 146.57946117882688, 369.1195900772025, 156.8986326712584, 
266.4836436748911, 197.894377603513, 182.53862661845025, 198.76199435087577], 
"eval_len": [37, 29, 30, 28, 69, 30, 51, 38, 35, 38]}

  9%|▉         | 89999/1000000 [1:03:31<8:02:48, 31.41it/s]global step 90000, trans_decision ep_re 163.13780921112956

{"global_step": 90000, "eval_re": [176.07605409158836, 173.05065642144294, 
150.8230012613489, 176.95305464378328, 166.7360053814016, 177.0290718681204, 
171.48371632089658, 136.12285218725364, 151.6811636497007, 151.42251628575923], 
"eval_len": [34, 33, 29, 34, 32, 34, 33, 26, 29, 29]}

 10%|▉         | 99999/1000000 [1:10:42<11:57:02, 20.92it/s]global step 100000, trans_decision ep_re 176.65951694173486

{"global_step": 100000, "eval_re": [150.91796710096622, 188.9577638435035, 
171.61667540940417, 237.28886988658365, 141.32913706033247, 188.3864716879472, 
182.7031075858911, 176.23809946664772, 156.7512531266303, 172.40582424944233], 
"eval_len": [29, 36, 33, 46, 27, 36, 35, 34, 30, 33]}

 11%|█         | 109999/1000000 [1:18:21<11:17:33, 21.89it/s]global step 110000, trans_decision ep_re 200.56060521082566

{"global_step": 110000, "eval_re": [171.9475464861156, 467.41121152720973, 
172.21019331087504, 124.71123209988131, 176.46841165662812, 161.84861903177242, 
146.6064189163849, 152.46032929092422, 224.59942779460988, 207.34266199385524], 
"eval_len": [33, 85, 33, 24, 34, 31, 28, 29, 43, 40]}

 12%|█▏        | 119999/1000000 [1:25:32<7:48:27, 31.31it/s]global step 120000, trans_decision ep_re 194.1977737040435

{"global_step": 120000, "eval_re": [141.18464445650787, 161.64369630264966, 
181.5797589040499, 177.4278436412016, 219.27782242500223, 365.3635340584986, 
172.92144929956333, 172.92436661448164, 183.40591859421693, 166.2487027442631], 
"eval_len": [27, 31, 35, 34, 42, 69, 33, 33, 35, 32]}

 13%|█▎        | 129997/1000000 [1:32:53<7:37:00, 31.73it/s]global step 130000, trans_decision ep_re 204.0030935254546

{"global_step": 130000, "eval_re": [166.49460041038276, 454.0645591823175, 
171.79059836023947, 177.67113770302797, 183.3032816770074, 186.89951106323923, 
194.73510944475964, 172.74586011392336, 135.54152822490553, 196.784749074743], 
"eval_len": [32, 84, 33, 34, 35, 36, 37, 33, 26, 38]}

 14%|█▍        | 139999/1000000 [1:40:14<7:31:32, 31.74it/s]global step 140000, trans_decision ep_re 172.36207168886745

{"global_step": 140000, "eval_re": [193.7651048313745, 146.92094383705737, 
202.8973680574578, 230.26459859623898, 174.9165500567212, 162.48027185943533, 
141.21188450823118, 146.9172337627707, 146.28584823278229, 177.9609131466053], 
"eval_len": [37, 28, 39, 44, 34, 31, 27, 28, 28, 34]}

 15%|█▍        | 149999/1000000 [1:47:32<7:26:55, 31.70it/s]global step 150000, trans_decision ep_re 210.9347384356648

{"global_step": 150000, "eval_re": [187.6311473415678, 152.1197317018162, 
278.716823581985, 162.21178940475102, 125.29249854505284, 192.48283015166385, 
204.82067438439367, 150.29750085693146, 362.0489003768359, 293.7254880116506], 
"eval_len": [36, 29, 53, 31, 24, 37, 39, 29, 67, 56]}

 16%|█▌        | 159997/1000000 [1:54:56<7:21:35, 31.70it/s]global step 160000, trans_decision ep_re 200.58027853057064

{"global_step": 160000, "eval_re": [176.4575778558908, 161.03429560196912, 
172.78605538770375, 477.48039735676684, 177.23588354410518, 192.54822501760793, 
168.1207704943903, 203.1107463050685, 130.7878028313063, 146.24103091089742], 
"eval_len": [34, 31, 33, 87, 34, 37, 32, 39, 25, 28]}

 17%|█▋        | 169996/1000000 [2:02:15<7:11:07, 32.09it/s]global step 170000, trans_decision ep_re 187.0371478367823

{"global_step": 170000, "eval_re": [146.74520645453637, 204.80279304162758, 
161.76789317419735, 156.70294938663224, 176.29174360356598, 187.88384797174075, 
157.13519815273352, 172.51579510910207, 140.90485209843513, 365.62119937525205],
"eval_len": [28, 39, 31, 30, 34, 36, 30, 33, 27, 69]}

 18%|█▊        | 179999/1000000 [2:09:36<7:12:37, 31.59it/s]global step 180000, trans_decision ep_re 196.90710620709257

{"global_step": 180000, "eval_re": [146.5835879949649, 411.38478005642014, 
156.1173942539539, 220.99941877964386, 177.12121563966116, 198.71316724487104, 
167.1186495182416, 166.30690021794177, 183.60918063553243, 141.11676772969489], 
"eval_len": [28, 77, 30, 42, 34, 38, 32, 32, 35, 27]}

 19%|█▉        | 189999/1000000 [2:17:12<7:05:27, 31.73it/s]global step 190000, trans_decision ep_re 162.45481437253173

{"global_step": 190000, "eval_re": [193.60512984454743, 150.7145083724362, 
151.90441643366722, 193.46655063210326, 136.4349659547381, 192.87965179069485, 
150.63536334812207, 156.8389363832998, 136.29987179332605, 161.7687491723823], 
"eval_len": [37, 29, 29, 37, 26, 37, 29, 30, 26, 31]}

 20%|█▉        | 199999/1000000 [2:24:33<7:03:25, 31.49it/s]global step 200000, trans_decision ep_re 196.21539802247452

{"global_step": 200000, "eval_re": [140.76884788372132, 140.85159176256107, 
203.20056329062328, 177.70070710514955, 172.52337830967713, 202.18657131656596, 
222.1797813777444, 202.51597861275326, 264.06512057120847, 236.16143999474065], 
"eval_len": [27, 27, 39, 34, 33, 39, 43, 39, 50, 45]}

 21%|██        | 209999/1000000 [2:31:53<6:54:46, 31.74it/s]global step 210000, trans_decision ep_re 182.71131746519262

{"global_step": 210000, "eval_re": [219.48010264479095, 157.4763877484294, 
152.38160812430573, 151.17322626174712, 146.0540112387825, 135.66002478377465, 
124.49597440525878, 141.2346305062817, 152.0391902073196, 447.11801873123585], 
"eval_len": [42, 30, 29, 29, 28, 26, 24, 27, 29, 81]}

 22%|██▏       | 219999/1000000 [2:39:08<6:48:20, 31.84it/s]global step 220000, trans_decision ep_re 168.25164633452013

{"global_step": 220000, "eval_re": [151.6919132760635, 177.20537033708263, 
186.82070454704046, 161.7021036959598, 141.41507247330102, 141.55899442544936, 
209.95555938463517, 161.8500593659012, 208.74158388024392, 141.575101959524], 
"eval_len": [29, 34, 36, 31, 27, 27, 40, 31, 40, 27]}

 23%|██▎       | 229996/1000000 [2:46:26<6:43:06, 31.84it/s]global step 230000, trans_decision ep_re 191.33451764438573

{"global_step": 230000, "eval_re": [162.18346505755616, 167.0081541505492, 
173.11765845321557, 141.78684237590986, 407.09661473496175, 194.29901487177554, 
146.39296741748979, 197.07921746815643, 166.89895445516964, 157.4822874590735], 
"eval_len": [31, 32, 33, 27, 75, 37, 28, 38, 32, 30]}

 24%|██▍       | 239999/1000000 [2:53:47<6:41:25, 31.55it/s]global step 240000, trans_decision ep_re 227.30921571797407

{"global_step": 240000, "eval_re": [183.52731270153134, 182.33117327262244, 
417.57034681591, 136.02989502976163, 188.86215534233932, 188.88498757579885, 
166.6008369326436, 146.81988107598937, 161.69042707216607, 500.7751413609781], 
"eval_len": [35, 35, 76, 26, 36, 36, 32, 28, 31, 90]}

 25%|██▍       | 249999/1000000 [3:01:07<6:33:07, 31.80it/s]global step 250000, trans_decision ep_re 214.89054141140068

{"global_step": 250000, "eval_re": [141.41633967150355, 162.08761556092549, 
141.2910215127127, 470.70821878446884, 187.37074349517366, 171.51825086304024, 
146.3697686087852, 408.94046029385964, 151.9960459089263, 167.20694941461105], 
"eval_len": [27, 31, 27, 84, 36, 33, 28, 76, 29, 32]}

 26%|██▌       | 259999/1000000 [3:08:43<6:28:30, 31.75it/s]global step 260000, trans_decision ep_re 174.33222327291264

{"global_step": 260000, "eval_re": [130.63494588076165, 157.251144543623, 
178.83032986769973, 140.7927563759753, 171.90338014694035, 163.41021220374665, 
205.44115447288647, 239.9125589305262, 177.42816588796072, 177.7175844190062], 
"eval_len": [25, 30, 34, 27, 33, 31, 39, 46, 34, 34]}

 27%|██▋       | 269998/1000000 [3:16:03<6:23:48, 31.70it/s]global step 270000, trans_decision ep_re 190.15841839677609

{"global_step": 270000, "eval_re": [151.46198736911327, 214.37723835331917, 
207.78258387809018, 314.5074609334245, 124.82362728436784, 242.26115860828244, 
140.76882743172916, 141.17510879162407, 222.88165308825748, 141.5445382295526], 
"eval_len": [29, 41, 40, 60, 24, 46, 27, 27, 43, 27]}

 28%|██▊       | 279999/1000000 [3:23:23<6:20:04, 31.57it/s]global step 280000, trans_decision ep_re 195.06071763738413

{"global_step": 280000, "eval_re": [156.8610682607223, 209.017156395224, 
146.10873223522663, 135.96320642044913, 171.7349926432742, 183.03014111618364, 
177.16662411908382, 398.24023634290427, 210.17527586486332, 162.30974297590998],
"eval_len": [30, 40, 28, 26, 33, 35, 34, 74, 40, 31]}

 29%|██▉       | 289999/1000000 [3:30:35<6:14:24, 31.61it/s]global step 290000, trans_decision ep_re 168.89260344531107

{"global_step": 290000, "eval_re": [183.17768771177336, 187.4368956169757, 
130.47610325191673, 141.6547185179561, 156.6789775276704, 146.85847480535, 
225.82739430167624, 176.8646042428102, 178.06866509581172, 161.8825133811704], 
"eval_len": [35, 36, 25, 27, 30, 28, 43, 34, 34, 31]}

 30%|██▉       | 299999/1000000 [3:37:54<6:03:20, 32.11it/s]global step 300000, trans_decision ep_re 178.03256473759075

{"global_step": 300000, "eval_re": [162.10355969210764, 191.97821672183798, 
171.99261975197825, 275.695961684865, 151.9965849046957, 135.52803244104814, 
182.8960862033729, 151.7746497222408, 167.26132733856525, 189.09860891519583], 
"eval_len": [31, 37, 33, 52, 29, 26, 35, 29, 32, 36]}

 31%|███       | 309999/1000000 [3:45:14<6:05:24, 31.47it/s]global step 310000, trans_decision ep_re 184.3366683560492

{"global_step": 310000, "eval_re": [177.07073173192825, 198.3843211398171, 
119.93072845951656, 166.79492984873454, 146.41492574057662, 193.31917499890233, 
136.0510617680878, 177.16241308575354, 381.606434698261, 146.6319620889142], 
"eval_len": [34, 38, 23, 32, 28, 37, 26, 34, 72, 28]}

 32%|███▏      | 319999/1000000 [3:52:33<5:55:58, 31.84it/s]global step 320000, trans_decision ep_re 218.94226766325255

{"global_step": 320000, "eval_re": [434.2099212089095, 177.26702718870453, 
161.82283637242398, 273.93083893241607, 305.61356212375074, 161.04336018459125, 
186.5777558250267, 171.702808285105, 135.3608420965019, 181.8937244150957], 
"eval_len": [79, 34, 31, 52, 57, 31, 36, 33, 26, 35]}

 33%|███▎      | 329999/1000000 [4:00:03<5:50:40, 31.84it/s]global step 330000, trans_decision ep_re 156.4389076468877

{"global_step": 330000, "eval_re": [145.96708370175003, 141.2998600210696, 
167.53523626289603, 156.9223399915758, 183.29596984936006, 172.39341593423717, 
157.10163048134388, 130.737626260551, 162.37888994451995, 146.75702402157364], 
"eval_len": [28, 27, 32, 30, 35, 33, 30, 25, 31, 28]}

 34%|███▍      | 339999/1000000 [4:07:23<5:45:07, 31.87it/s]global step 340000, trans_decision ep_re 220.99419639322574

{"global_step": 340000, "eval_re": [176.98278667348177, 414.29467725242955, 
162.10221867793615, 203.73930860462892, 147.18021514981982, 140.9423073111086, 
173.05500860285068, 182.16067310839705, 187.3512523430841, 422.1335162085206], 
"eval_len": [34, 77, 31, 39, 28, 27, 33, 35, 36, 78]}

 35%|███▍      | 349999/1000000 [4:14:43<5:41:30, 31.72it/s]global step 350000, trans_decision ep_re 175.81139831125424

{"global_step": 350000, "eval_re": [172.31194759409703, 187.3772168254053, 
182.53061270784707, 193.97958917462407, 162.17855676857087, 237.17476863164973, 
162.29635816171844, 151.2763583691739, 162.70066035897005, 146.287914520486], 
"eval_len": [33, 36, 35, 37, 31, 45, 31, 29, 31, 28]}

 36%|███▌      | 359999/1000000 [4:21:50<5:33:49, 31.95it/s]global step 360000, trans_decision ep_re 200.70546605562538

{"global_step": 360000, "eval_re": [177.69992184833458, 147.0405420841816, 
213.75933262175985, 156.5724921432528, 141.27476394450989, 373.7372609196025, 
151.4324175780925, 337.9406066844229, 135.68921297693933, 171.90810975515782], 
"eval_len": [34, 28, 41, 30, 27, 70, 29, 63, 26, 33]}

 37%|███▋      | 369999/1000000 [4:29:33<5:33:04, 31.52it/s]global step 370000, trans_decision ep_re 204.24118470991635

{"global_step": 370000, "eval_re": [174.1827473342927, 391.37603930160986, 
151.63080832091532, 242.22348059856174, 156.16886347613408, 212.93666213590396, 
208.31667615609777, 171.6588923303144, 171.84265840840987, 162.07501903692346], 
"eval_len": [34, 74, 29, 46, 30, 41, 40, 33, 33, 31]}

 38%|███▊      | 379999/1000000 [4:36:45<5:26:55, 31.61it/s]global step 380000, trans_decision ep_re 210.95832122935923

{"global_step": 380000, "eval_re": [161.98771529926506, 151.52895950562328, 
173.22335549185541, 189.1829994201742, 135.94171709893894, 635.6247133955022, 
156.00433810303693, 167.58110123868752, 197.83905456138197, 140.6692581791268], 
"eval_len": [31, 29, 33, 36, 26, 116, 30, 32, 38, 27]}

 39%|███▉      | 389999/1000000 [4:44:06<5:22:23, 31.53it/s]global step 390000, trans_decision ep_re 177.30130889603188

{"global_step": 390000, "eval_re": [232.06033219442045, 197.96492677498816, 
157.11879016077245, 151.66359575158162, 136.0980113521744, 203.19656387907665, 
155.54340603099655, 194.68310668647626, 156.72293336586335, 187.96142276396895],
"eval_len": [44, 38, 30, 29, 26, 39, 30, 37, 30, 36]}

 40%|███▉      | 399999/1000000 [4:51:34<5:13:32, 31.89it/s]global step 400000, trans_decision ep_re 164.20283671778338

{"global_step": 400000, "eval_re": [130.85756899848388, 135.4977517745397, 
147.21927019123484, 213.55239644916693, 141.60658983158802, 187.51035409472283, 
183.01764228766748, 162.36071927126656, 183.62268693977526, 156.7833873393881], 
"eval_len": [25, 26, 28, 41, 27, 36, 35, 31, 35, 30]}

 41%|████      | 409999/1000000 [4:58:54<5:11:27, 31.57it/s]global step 410000, trans_decision ep_re 172.39773975939633

{"global_step": 410000, "eval_re": [177.48589012132257, 161.87605390232653, 
209.57293425331738, 176.48117086148775, 141.84916639540006, 151.81722149822858, 
136.3552015521282, 219.84322201669193, 156.48834913264815, 192.20818786041212], 
"eval_len": [34, 31, 40, 34, 27, 29, 26, 42, 30, 37]}

 42%|████▏     | 419999/1000000 [5:06:14<5:40:15, 28.41it/s]global step 420000, trans_decision ep_re 182.3357576503574

{"global_step": 420000, "eval_re": [187.41272323561506, 172.1639162728224, 
150.78216226416492, 136.21795627807066, 176.0225251570575, 275.35216188505905, 
183.34694963590056, 156.91437295435975, 243.87289057261023, 141.27191824791407],
"eval_len": [36, 33, 29, 26, 34, 52, 35, 30, 47, 27]}

 43%|████▎     | 429999/1000000 [5:13:19<5:00:31, 31.61it/s]global step 430000, trans_decision ep_re 232.3988830117606

{"global_step": 430000, "eval_re": [207.41257711501356, 146.374555485642, 
176.4562949552492, 156.5218955329496, 140.93542093444495, 348.93152197088057, 
388.89336719576795, 398.0526975042703, 167.01676996798568, 193.39372945540248], 
"eval_len": [40, 28, 34, 30, 27, 66, 72, 74, 32, 37]}

 44%|████▍     | 439999/1000000 [5:20:38<4:53:23, 31.81it/s]global step 440000, trans_decision ep_re 232.8879249564956

{"global_step": 440000, "eval_re": [145.4963139219633, 151.844662582913, 
451.154058628411, 161.91405850067272, 161.79214386473157, 146.74212680642603, 
171.87227021965637, 360.1511046604335, 380.13097528435594, 197.78153509539248], 
"eval_len": [28, 29, 80, 31, 31, 28, 33, 67, 71, 38]}

 45%|████▍     | 449999/1000000 [5:27:56<4:52:48, 31.31it/s]global step 450000, trans_decision ep_re 154.72338153623113

{"global_step": 450000, "eval_re": [141.14541090303965, 166.2079948129, 
172.46215242564816, 151.6117175568678, 146.66195956535373, 141.43841348341056, 
157.2355561483082, 145.94076713465918, 146.46388141176655, 178.0659619203575], 
"eval_len": [27, 32, 33, 29, 28, 27, 30, 28, 28, 34]}

 46%|████▌     | 459999/1000000 [5:35:16<4:44:39, 31.62it/s]global step 460000, trans_decision ep_re 162.90964152561747

{"global_step": 460000, "eval_re": [157.0880143973908, 177.51041864398712, 
162.322489603721, 167.27755056272252, 140.97758737544996, 141.12560568126705, 
197.9792775778586, 145.88520861374258, 167.7048767307063, 171.22538606932906], 
"eval_len": [30, 34, 31, 32, 27, 27, 38, 28, 32, 33]}

 47%|████▋     | 469999/1000000 [5:42:44<4:40:37, 31.48it/s]global step 470000, trans_decision ep_re 206.5539632643488

{"global_step": 470000, "eval_re": [394.7434984408991, 364.0314729787278, 
168.07174219604198, 140.8309368704423, 177.81467065904044, 135.5401741625181, 
196.7108597689152, 188.5565059958663, 168.40854451521946, 130.83122705581712], 
"eval_len": [74, 69, 32, 27, 34, 26, 38, 36, 32, 25]}

 48%|████▊     | 479999/1000000 [5:50:04<4:31:40, 31.90it/s]global step 480000, trans_decision ep_re 181.86140546704422

{"global_step": 480000, "eval_re": [162.93932231488552, 177.02197804845724, 
156.81272028146898, 151.47725405439053, 178.10253511230664, 357.6318284086325, 
141.9919007846527, 168.86142109992662, 177.58196045612982, 146.193134109592], 
"eval_len": [31, 34, 30, 29, 34, 65, 27, 32, 34, 28]}

 49%|████▉     | 489999/1000000 [5:57:08<4:27:06, 31.82it/s]global step 490000, trans_decision ep_re 200.99733325560373

{"global_step": 490000, "eval_re": [162.05772163788535, 166.7321875100261, 
418.6453448363639, 166.57905413774006, 176.76801650082967, 167.44271815353326, 
233.92046626701537, 162.6880593467303, 193.58661893182563, 161.55314523408785], 
"eval_len": [31, 32, 78, 32, 34, 32, 45, 31, 37, 31]}

 50%|████▉     | 499997/1000000 [6:04:25<4:21:36, 31.85it/s]global step 500000, trans_decision ep_re 166.70952352297112

{"global_step": 500000, "eval_re": [162.10976708387227, 191.4004968721957, 
146.6983233092976, 162.69509958060246, 161.6518740272674, 167.38612789799777, 
151.52832182061383, 141.1398705450114, 209.5537359969623, 172.93161809589063], 
"eval_len": [31, 37, 28, 31, 31, 32, 29, 27, 40, 33]}

 51%|█████     | 509997/1000000 [6:11:54<4:16:34, 31.83it/s]global step 510000, trans_decision ep_re 213.95926318831388

{"global_step": 510000, "eval_re": [146.14670489136049, 269.0377001910133, 
182.76880993171213, 162.2157826866985, 197.63349585544637, 190.2620788533377, 
172.85511381217924, 440.8015769438105, 205.28409585908568, 172.58727285849474], 
"eval_len": [28, 51, 35, 31, 38, 36, 33, 81, 39, 33]}

 52%|█████▏    | 519999/1000000 [6:19:00<4:11:21, 31.83it/s]global step 520000, trans_decision ep_re 224.53885506717802

{"global_step": 520000, "eval_re": [182.01466015940775, 172.46299617424037, 
423.2168011376069, 146.38054536158504, 152.71388532069832, 406.4697318147144, 
181.87683899966612, 220.3073648529561, 172.53010673953955, 187.4156201113654], 
"eval_len": [35, 33, 79, 28, 29, 75, 35, 42, 33, 36]}

 53%|█████▎    | 529999/1000000 [6:26:18<4:05:37, 31.89it/s]global step 530000, trans_decision ep_re 190.69125137794555

{"global_step": 530000, "eval_re": [141.1558747301216, 201.45593296834917, 
189.63436102472372, 173.3789947296618, 152.6544634974108, 146.83217025812127, 
362.25238928977535, 162.33482048528873, 177.6000148825305, 199.6134919134726], 
"eval_len": [27, 39, 36, 33, 29, 28, 68, 31, 34, 38]}

 54%|█████▍    | 539999/1000000 [6:33:35<4:02:17, 31.64it/s]global step 540000, trans_decision ep_re 173.52296345754158

{"global_step": 540000, "eval_re": [188.74003400090686, 167.25435719723782, 
250.6980231272448, 177.98827277334522, 203.15759183184758, 162.11549864005946, 
130.8079264956898, 135.94653989144643, 141.14956265143246, 177.3718279662054], 
"eval_len": [36, 32, 48, 34, 39, 31, 25, 26, 27, 34]}

 55%|█████▍    | 549999/1000000 [6:41:04<3:55:08, 31.90it/s]global step 550000, trans_decision ep_re 186.96306940821904

{"global_step": 550000, "eval_re": [167.53417795208532, 172.46956689715466, 
167.29627373483643, 162.4088645150116, 168.33931992063455, 177.41624365508687, 
357.73229654642597, 167.1421791006481, 135.82935786389257, 193.4624138964143], 
"eval_len": [32, 33, 32, 31, 32, 34, 68, 32, 26, 37]}

 56%|█████▌    | 559999/1000000 [6:48:09<4:02:52, 30.19it/s]global step 560000, trans_decision ep_re 175.87869028555014

{"global_step": 560000, "eval_re": [161.68260807505493, 177.85381612721704, 
156.79910861350564, 151.96417989099635, 182.57656599760816, 322.32382839467215, 
176.68489373910222, 131.17759780718401, 156.27765749143447, 141.4466467187263], 
"eval_len": [31, 34, 30, 29, 35, 61, 34, 25, 30, 27]}

 57%|█████▋    | 569999/1000000 [6:55:26<3:46:12, 31.68it/s]global step 570000, trans_decision ep_re 193.30303156012354

{"global_step": 570000, "eval_re": [151.2710924530091, 182.1108556284135, 
161.44010919430644, 389.3611925873237, 196.96867061574213, 166.54629556891717, 
209.49837767050008, 172.51909654806758, 166.96559066736464, 136.34903466759116],
"eval_len": [29, 35, 31, 72, 38, 32, 40, 33, 32, 26]}

 58%|█████▊    | 579996/1000000 [7:02:46<3:43:28, 31.32it/s]global step 580000, trans_decision ep_re 155.67451271486203

{"global_step": 580000, "eval_re": [125.80196075853937, 208.8059121812251, 
162.600926663888, 141.15163880681726, 181.22491969153396, 135.87405639974463, 
135.67374387147518, 155.92199527898214, 147.08594951482337, 162.6040239815911], 
"eval_len": [24, 40, 31, 27, 35, 26, 26, 30, 28, 31]}

 59%|█████▉    | 589996/1000000 [7:10:05<3:36:11, 31.61it/s]global step 590000, trans_decision ep_re 209.28700031466096

{"global_step": 590000, "eval_re": [350.0021775590849, 146.78556413047255, 
172.89587358716804, 235.1726299558414, 197.3702716392128, 125.16298428623149, 
178.7466045839974, 172.2985414614894, 168.45337947911162, 345.98197646400024], 
"eval_len": [65, 28, 33, 45, 38, 24, 34, 33, 32, 65]}

 60%|█████▉    | 599999/1000000 [7:17:27<3:33:38, 31.20it/s]global step 600000, trans_decision ep_re 252.12378697595494

{"global_step": 600000, "eval_re": [835.6149070063065, 332.5147426039992, 
161.4961191375077, 141.14039740161937, 167.25080666059318, 156.71125418916697, 
157.16442338211561, 204.2722896865019, 213.5306752764757, 151.542254415263], 
"eval_len": [156, 63, 31, 27, 32, 30, 30, 39, 41, 29]}

 61%|██████    | 609999/1000000 [7:24:44<3:22:22, 32.12it/s]global step 610000, trans_decision ep_re 204.62463788977843

{"global_step": 610000, "eval_re": [155.5492479656048, 167.9570267454507, 
204.10742472824074, 183.57081881798436, 151.79121615590225, 503.651175257106, 
187.90108588268072, 177.24637459741973, 146.30288326461184, 168.16912548278324],
"eval_len": [30, 32, 39, 35, 29, 90, 36, 34, 28, 32]}

 62%|██████▏   | 619999/1000000 [7:32:14<3:35:39, 29.37it/s]global step 620000, trans_decision ep_re 248.1892818599565

{"global_step": 620000, "eval_re": [166.60345179239116, 382.4861377437025, 
483.4237308145887, 214.56549115477952, 178.53835238877724, 151.898293694327, 
156.9190499410452, 392.6689390450458, 172.3786735401309, 182.41069848477707], 
"eval_len": [32, 71, 88, 41, 34, 29, 30, 72, 33, 35]}

 63%|██████▎   | 629999/1000000 [7:39:19<3:13:41, 31.84it/s]global step 630000, trans_decision ep_re 171.5152223810146

{"global_step": 630000, "eval_re": [145.92954186372057, 130.74428610008084, 
216.36445068859953, 182.71240509305014, 156.64129110706818, 177.37338876333635, 
162.26775908333312, 203.53860850949437, 161.7598194386215, 177.8206731628415], 
"eval_len": [28, 25, 41, 35, 30, 34, 31, 39, 31, 34]}

 64%|██████▍   | 639996/1000000 [7:47:08<3:07:50, 31.94it/s]global step 640000, trans_decision ep_re 183.37590143261565

{"global_step": 640000, "eval_re": [188.22131802257184, 167.37249353164566, 
141.11067601685227, 171.26139238388822, 168.0153543344605, 177.6813807370032, 
146.39984877138033, 360.5751315197982, 151.92122269655022, 161.2001963120062], 
"eval_len": [36, 32, 27, 33, 32, 34, 28, 68, 29, 31]}

 65%|██████▍   | 649999/1000000 [7:54:35<3:05:23, 31.46it/s]global step 650000, trans_decision ep_re 212.46753454875548

{"global_step": 650000, "eval_re": [177.4618461711268, 167.28174065713728, 
172.58029384742926, 182.0237405863679, 188.84962574757205, 428.3941243420038, 
172.26978990674783, 156.70840295229627, 151.9506297483219, 327.15515152855176], 
"eval_len": [34, 32, 33, 35, 36, 80, 33, 30, 29, 64]}

 66%|██████▌   | 659999/1000000 [8:01:41<2:58:56, 31.67it/s]global step 660000, trans_decision ep_re 185.55305407214456

{"global_step": 660000, "eval_re": [182.89746148481998, 177.3377812362278, 
171.14457317311565, 375.99812398699453, 151.87912659629617, 146.38836154197014, 
162.5117634884641, 162.50425225591923, 173.3676488082459, 151.50144814939208], 
"eval_len": [35, 34, 33, 70, 29, 28, 31, 31, 33, 29]}

 67%|██████▋   | 669999/1000000 [8:08:58<2:53:10, 31.76it/s]global step 670000, trans_decision ep_re 186.42419621402325

{"global_step": 670000, "eval_re": [146.129506874129, 146.55232463916, 
167.8785307415852, 168.09676270681598, 175.92924286382512, 370.1801640013374, 
152.63935629147804, 188.24155552170782, 172.05186168232194, 176.54265681787206],
"eval_len": [28, 28, 32, 32, 34, 69, 29, 36, 33, 34]}

 68%|██████▊   | 679999/1000000 [8:16:15<2:46:48, 31.97it/s]global step 680000, trans_decision ep_re 191.54219788353456

{"global_step": 680000, "eval_re": [214.52003817982782, 146.760763636196, 
178.68954043992582, 253.63921025097008, 166.1975639098928, 141.07771077552547, 
303.0625043472993, 177.48066910758317, 192.55270641392136, 141.44127177420364], 
"eval_len": [41, 28, 34, 48, 32, 27, 60, 34, 37, 27]}

 69%|██████▉   | 689999/1000000 [8:23:45<2:42:15, 31.84it/s]global step 690000, trans_decision ep_re 216.96489454227077

{"global_step": 690000, "eval_re": [172.4657983314813, 156.00853217639485, 
178.4083396344052, 192.65037597315364, 341.1303731195532, 203.31793327496823, 
428.3443309002318, 189.0773729478033, 157.06727938028732, 151.17860968442903], 
"eval_len": [33, 30, 34, 37, 64, 39, 77, 36, 30, 29]}

 70%|██████▉   | 699999/1000000 [8:31:05<2:36:58, 31.85it/s]global step 700000, trans_decision ep_re 230.28865255214822

{"global_step": 700000, "eval_re": [151.48787175966515, 617.3752963070849, 
193.7540508521821, 145.2590497420866, 172.0916625568234, 188.62020405221077, 
162.4008333352902, 162.87397349048032, 310.7158153600549, 198.30776806560368], 
"eval_len": [29, 107, 37, 28, 33, 36, 31, 31, 58, 38]}

 71%|███████   | 709999/1000000 [8:38:10<2:30:59, 32.01it/s]global step 710000, trans_decision ep_re 203.09073687508698

{"global_step": 710000, "eval_re": [157.18741683621494, 161.84773820806487, 
171.2667000287547, 242.77113619712492, 423.37650749788344, 152.06599592293907, 
166.48245344436694, 151.4240264446604, 214.81738999550555, 189.66800417535504], 
"eval_len": [30, 31, 33, 46, 78, 29, 32, 29, 41, 36]}

 72%|███████▏  | 719999/1000000 [8:45:27<2:26:46, 31.79it/s]global step 720000, trans_decision ep_re 165.26715925845465

{"global_step": 720000, "eval_re": [177.1383929795976, 157.14816661770578, 
125.26288643296212, 151.40209730993544, 167.8545953294354, 151.08238948797268, 
162.7899136616243, 194.08933480796844, 146.4128841160462, 219.49093184129865], 
"eval_len": [34, 30, 24, 29, 32, 29, 31, 37, 28, 42]}

 73%|███████▎  | 729999/1000000 [8:52:45<2:22:17, 31.62it/s]global step 730000, trans_decision ep_re 158.06243625841978

{"global_step": 730000, "eval_re": [189.0622264594068, 130.9212068834775, 
166.93434090217423, 162.32819408709628, 157.27079433550696, 156.583246348797, 
145.7549649321098, 146.84291701344355, 146.78694601726522, 178.1395256049207], 
"eval_len": [36, 25, 32, 31, 30, 30, 28, 28, 28, 34]}

 74%|███████▍  | 739997/1000000 [9:00:15<2:15:45, 31.92it/s]global step 740000, trans_decision ep_re 177.51916203525045

{"global_step": 740000, "eval_re": [156.2739684675992, 309.7434513627237, 
192.3011302772626, 135.80328890523356, 167.33794247322996, 182.86078197006228, 
172.10366825947995, 140.37989112876434, 161.81805492175732, 156.56944258639174],
"eval_len": [30, 58, 37, 26, 32, 35, 33, 27, 31, 30]}

 75%|███████▍  | 749999/1000000 [9:07:35<2:10:57, 31.82it/s]global step 750000, trans_decision ep_re 188.55494379796409

{"global_step": 750000, "eval_re": [162.3659066693103, 156.79820851383235, 
161.83207974017319, 191.79191394077307, 167.38521272491926, 162.45942992578932, 
387.885535654793, 171.41599614299616, 177.42447388094902, 146.19068078610536], 
"eval_len": [31, 30, 31, 37, 32, 31, 73, 33, 34, 28]}

 76%|███████▌  | 759999/1000000 [9:14:41<2:05:31, 31.87it/s]global step 760000, trans_decision ep_re 185.6746632419499

{"global_step": 760000, "eval_re": [147.0203323838992, 406.06113272963705, 
156.09783451813277, 130.90927679057702, 152.40320673695638, 176.6482954389888, 
156.7381595757775, 167.31384358925973, 181.32943272485116, 182.2251179314193], 
"eval_len": [28, 75, 30, 25, 29, 34, 30, 32, 35, 35]}

 77%|███████▋  | 769999/1000000 [9:22:00<2:03:40, 31.00it/s]global step 770000, trans_decision ep_re 275.4612629060847

{"global_step": 770000, "eval_re": [406.3273355444708, 223.39563313009492, 
712.1837195970212, 146.5229315055408, 157.06236662587494, 177.12136165218263, 
244.26552461990102, 353.6820313469127, 141.28604185701056, 192.76568318183706], 
"eval_len": [74, 43, 130, 28, 30, 34, 47, 67, 27, 37]}

 78%|███████▊  | 779999/1000000 [9:29:19<1:54:55, 31.91it/s]global step 780000, trans_decision ep_re 280.7189703452374

{"global_step": 780000, "eval_re": [327.3670657358806, 188.94512746533178, 
367.4744463901342, 156.98085959207108, 188.28563513691878, 748.1813474977538, 
225.77011681737585, 161.39721881246805, 178.46228900709448, 264.3255969973454], 
"eval_len": [64, 36, 69, 30, 36, 131, 43, 31, 34, 50]}

 79%|███████▉  | 789999/1000000 [9:36:40<1:50:01, 31.81it/s]global step 790000, trans_decision ep_re 161.50315288885753

{"global_step": 790000, "eval_re": [130.80013220125167, 193.05635163067265, 
151.82055033406684, 141.5696169060364, 151.56273000158114, 172.66728127823177, 
161.44974996835853, 156.5822580464458, 203.7650211228583, 151.7578373990723], 
"eval_len": [25, 37, 29, 27, 29, 33, 31, 30, 39, 29]}

 80%|███████▉  | 799999/1000000 [9:43:59<1:45:49, 31.50it/s]global step 800000, trans_decision ep_re 181.06132242039584

{"global_step": 800000, "eval_re": [152.319355192978, 192.76012249963344, 
152.2775524096283, 175.8553136310976, 172.80023857689673, 289.6659547788853, 
213.92537759122737, 172.40158816283224, 152.20898516495453, 136.39873619582488],
"eval_len": [29, 37, 29, 34, 33, 56, 41, 33, 29, 26]}

 81%|████████  | 809999/1000000 [9:51:25<1:38:40, 32.09it/s]global step 810000, trans_decision ep_re 172.13232006723064

{"global_step": 810000, "eval_re": [221.32730221363673, 182.85294777841835, 
157.25770719008554, 178.3573249485689, 157.16753714386022, 141.28425512687664, 
135.82556018856286, 136.0538012698913, 254.2874522953552, 156.90931251705044], 
"eval_len": [42, 35, 30, 34, 30, 27, 26, 26, 49, 30]}

 82%|████████▏ | 819996/1000000 [9:58:21<1:32:53, 32.30it/s]global step 820000, trans_decision ep_re 172.10140580515193

{"global_step": 820000, "eval_re": [146.6589048851281, 203.8376943837478, 
178.0777470155151, 171.99033241917562, 161.79558674757473, 188.34202073049585, 
156.84918462111776, 156.78984541133207, 151.22884897839796, 205.4438928590344], 
"eval_len": [28, 39, 34, 33, 31, 36, 30, 30, 29, 39]}

 83%|████████▎ | 829996/1000000 [10:06:49<1:27:25, 32.41it/s]global step 830000, trans_decision ep_re 191.03952957809014

{"global_step": 830000, "eval_re": [135.62522146049807, 135.78449954012413, 
219.5307391531579, 194.30819513711052, 318.92587793888396, 146.3326814782105, 
315.7747112530265, 140.8967938096784, 151.53230324100144, 151.68427276921017], 
"eval_len": [26, 26, 42, 37, 61, 28, 60, 27, 29, 29]}

 84%|████████▍ | 839996/1000000 [10:13:44<1:22:26, 32.35it/s]global step 840000, trans_decision ep_re 167.98922925252393

{"global_step": 840000, "eval_re": [162.36231883857502, 157.30393003577387, 
182.87684937650138, 120.29559757203833, 199.40373882022718, 161.97375281396927, 
146.62892655688674, 193.65603314747023, 167.09306898560436, 188.29807637819292],
"eval_len": [31, 30, 35, 23, 38, 31, 28, 37, 32, 36]}

 85%|████████▍ | 849999/1000000 [10:21:09<1:19:31, 31.43it/s]global step 850000, trans_decision ep_re 224.18762169876854

{"global_step": 850000, "eval_re": [177.63003739809304, 130.4143297617229, 
181.85160488004064, 187.20950600360763, 167.82595080457136, 226.10584242072693, 
157.35516808516732, 217.53984878845782, 649.7619839564187, 146.1819448888793], 
"eval_len": [34, 25, 35, 36, 32, 43, 30, 42, 113, 28]}

 86%|████████▌ | 859996/1000000 [10:31:28<1:12:20, 32.25it/s]global step 860000, trans_decision ep_re 235.8312406598885

{"global_step": 860000, "eval_re": [425.7190232023904, 213.93684169481898, 
166.25507516852753, 187.94043811853837, 511.8997910625236, 198.94799399569243, 
168.7212209968884, 171.4065166977345, 161.46468649061688, 152.0208191711542], 
"eval_len": [77, 41, 32, 36, 93, 38, 32, 33, 31, 29]}

 87%|████████▋ | 869996/1000000 [10:38:38<1:06:52, 32.40it/s]global step 870000, trans_decision ep_re 186.4220655791543

{"global_step": 870000, "eval_re": [130.80479745679153, 278.23114720994175, 
152.27044939107248, 156.33058536359152, 146.44745918904428, 211.05510654143185, 
182.4301450675656, 171.96370281065552, 146.88874629761918, 287.7985164638291], 
"eval_len": [25, 53, 29, 30, 28, 40, 35, 33, 28, 56]}

 88%|████████▊ | 879999/1000000 [10:46:07<1:19:08, 25.27it/s]global step 880000, trans_decision ep_re 221.58897411588077

{"global_step": 880000, "eval_re": [182.95036964773197, 146.6297877433858, 
146.71601599451097, 130.9540380511972, 161.9481047783409, 809.8528578619846, 
209.02858267046355, 125.69029458252773, 120.28853415255148, 181.83115567611347],
"eval_len": [35, 28, 28, 25, 31, 155, 40, 24, 23, 35]}

 89%|████████▉ | 889996/1000000 [10:53:57<1:32:54, 19.73it/s]global step 890000, trans_decision ep_re 224.2747835567614

{"global_step": 890000, "eval_re": [146.42989742134623, 414.9382000893667, 
157.1081419704437, 182.4970793786809, 203.9380252225558, 183.02152438155372, 
395.3104273853361, 141.2081000226007, 214.17375387711655, 204.12268581861363], 
"eval_len": [28, 77, 30, 35, 39, 35, 74, 27, 41, 39]}

 90%|████████▉ | 899996/1000000 [11:01:07<54:37, 30.52it/s]global step 900000, trans_decision ep_re 177.7195288503454

{"global_step": 900000, "eval_re": [161.49031301569067, 161.65955778975538, 
188.21405672180128, 184.2265555701816, 176.11546765313233, 162.0341905626267, 
141.67798656875956, 182.85585869229593, 167.4273769759042, 251.49392495330625], 
"eval_len": [31, 31, 36, 35, 34, 31, 27, 35, 32, 48]}

 91%|█████████ | 909996/1000000 [11:08:20<46:12, 32.47it/s]global step 910000, trans_decision ep_re 239.0755557147034

{"global_step": 910000, "eval_re": [172.88894182568015, 177.11114396640485, 
157.61852916662045, 657.5533642105712, 157.18581617433097, 181.9332093520721, 
172.4608810973262, 219.5389727158626, 182.98227450081035, 311.4824241373548], 
"eval_len": [33, 34, 30, 117, 30, 35, 33, 42, 35, 60]}

 92%|█████████▏| 919996/1000000 [11:15:45<41:30, 32.13it/s]global step 920000, trans_decision ep_re 230.46508525704485

{"global_step": 920000, "eval_re": [177.44126802131552, 609.3415310899393, 
283.9395481986966, 173.49302090802757, 191.74874740135022, 167.64060214243156, 
172.66029974084736, 177.7795434775067, 167.61939353080967, 182.98689805952387], 
"eval_len": [34, 106, 54, 33, 37, 32, 33, 34, 32, 35]}

 93%|█████████▎| 929996/1000000 [11:22:42<36:03, 32.35it/s]global step 930000, trans_decision ep_re 210.04338357114716

{"global_step": 930000, "eval_re": [172.8235576802593, 141.67896269143546, 
208.03302029302483, 172.27451416033986, 172.63517856095729, 172.28367151964804, 
145.7364478181787, 522.2588530741757, 209.92574096983614, 182.78388894361623], 
"eval_len": [33, 27, 40, 33, 33, 33, 28, 92, 40, 35]}

 94%|█████████▍| 939996/1000000 [11:30:05<30:38, 32.64it/s]global step 940000, trans_decision ep_re 182.5380425863212

{"global_step": 940000, "eval_re": [182.94535689887607, 157.67475589386416, 
172.62030289107258, 146.42979323978747, 223.56157521487876, 162.2391780486525, 
340.72976436298507, 130.7602662650443, 166.80244227146144, 141.61699077659006], 
"eval_len": [35, 30, 33, 28, 43, 31, 64, 25, 32, 27]}

 95%|█████████▍| 949996/1000000 [11:37:15<25:29, 32.69it/s]global step 950000, trans_decision ep_re 190.7162539886141

{"global_step": 950000, "eval_re": [263.97061695798266, 177.76545357969331, 
140.97885396302988, 136.11025619067496, 304.3392380260888, 135.79367135762288, 
166.5822548433062, 157.08419578142315, 293.7800788292772, 130.75792035704197], 
"eval_len": [50, 34, 27, 26, 57, 26, 32, 30, 55, 25]}

 96%|█████████▌| 959996/1000000 [11:46:15<20:40, 32.24it/s]global step 960000, trans_decision ep_re 170.07427508824367

{"global_step": 960000, "eval_re": [147.10697618274605, 162.2364142687531, 
177.5198294170416, 188.53913226043176, 136.16855861723127, 172.2768251000915, 
183.51880762329557, 230.54972824948686, 135.4522996389921, 167.37417952436675], 
"eval_len": [28, 31, 34, 36, 26, 33, 35, 44, 26, 32]}

 97%|█████████▋| 969999/1000000 [11:58:45<15:26, 32.40it/s]global step 970000, trans_decision ep_re 222.31380476296584

{"global_step": 970000, "eval_re": [141.80608502962687, 178.16977355295552, 
178.19174811616054, 193.4643530149365, 184.01291597129958, 146.61262783046382, 
692.2557658619006, 167.18880704326466, 172.8563635302151, 168.5796076788352], 
"eval_len": [27, 34, 34, 37, 35, 28, 127, 32, 33, 32]}

 98%|█████████▊| 979999/1000000 [12:06:08<10:22, 32.14it/s]global step 980000, trans_decision ep_re 189.39528054723542

{"global_step": 980000, "eval_re": [171.79635426123295, 161.5786160508817, 
172.3456020465631, 213.5072861350791, 193.07313465373915, 141.20067409479068, 
286.9129169100309, 146.25810323947115, 250.62201023356695, 156.65810784699846], 
"eval_len": [33, 31, 33, 41, 37, 27, 54, 28, 48, 30]}

 99%|█████████▉| 989996/1000000 [12:13:04<05:08, 32.43it/s]global step 990000, trans_decision ep_re 205.84325623031705

{"global_step": 990000, "eval_re": [178.01141778164643, 219.99227597580142, 
202.76777184546162, 364.98991080941494, 163.0268173639879, 181.27104755508705, 
172.4077135514739, 230.77208002085035, 187.73490240630022, 157.45862499314654], 
"eval_len": [34, 42, 39, 69, 31, 35, 33, 44, 36, 30]}

100%|█████████▉| 999999/1000000 [12:20:12<00:00, 32.17it/s]global step 1000000, trans_decision ep_re 203.8888758106833

{"global_step": 1000000, "eval_re": [193.6237636492438, 176.64959827406625, 
213.962128810734, 437.1083833166262, 131.06787887294487, 187.3617116909727, 
151.80846823660852, 151.02281424142097, 161.75329832429478, 234.5307126899208], 
"eval_len": [37, 34, 41, 81, 25, 36, 29, 29, 31, 45]}

100%|██████████| 1000000/1000000 [12:20:25<00:00, 22.51it/s]
