
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [03:30<8:00:55, 34.31it/s]global step 10000, trans_decision ep_re 159.6071021552522

{"global_step": 10000, "eval_re": [202.52153973953634, 14.385162263341062, 
241.31741683935368, 19.732863765867602, 25.725648828933778, 25.228210129613174, 
22.399256987170677, 14.001950102944008, 1011.4934272883077, 19.26554560745402], 
"eval_len": [117, 25, 143, 36, 35, 36, 35, 26, 1000, 30]}

  2%|▏         | 19996/1000000 [09:54<7:46:38, 35.00it/s]global step 20000, trans_decision ep_re 45.2936679453614

{"global_step": 20000, "eval_re": [38.66305364791604, 25.798202136687603, 
118.09016456353575, 116.17588280940146, 27.578727747493062, 21.371633292486536, 
18.240697261842488, 24.796783832472542, 22.384994011197826, 39.83654015058067], 
"eval_len": [42, 35, 91, 94, 39, 39, 31, 37, 35, 45]}

  3%|▎         | 29996/1000000 [16:24<7:42:40, 34.94it/s]global step 30000, trans_decision ep_re 58.137350147333926

{"global_step": 30000, "eval_re": [92.47606381984478, 86.20372294906676, 
15.970842992914463, 58.840340761176584, 17.978271479346972, 75.34445868501376, 
58.936852532151306, -7.837642756393888, 76.37751453305923, 107.08307647715924], 
"eval_len": [170, 129, 29, 81, 34, 147, 70, 155, 95, 113]}

  4%|▍         | 39996/1000000 [22:57<7:40:47, 34.72it/s]global step 40000, trans_decision ep_re 71.46964112826207

{"global_step": 40000, "eval_re": [20.697796848993228, 179.89239827352483, 
243.05878959848138, 16.282738972992842, 17.061581872623876, 17.800234796714115, 
21.37267250034432, 25.123173720524488, 158.64379091938466, 14.763233779036995], 
"eval_len": [32, 152, 134, 28, 29, 31, 33, 39, 123, 26]}

  5%|▍         | 49996/1000000 [29:40<7:37:38, 34.60it/s]global step 50000, trans_decision ep_re 105.59827309970072

{"global_step": 50000, "eval_re": [24.757564236105377, 200.5283497594599, 
111.85487489993372, 130.53500116984358, 23.4258230124361, 22.678598034170825, 
36.75535287850413, 287.56977932426344, 25.626119880891626, 192.25126780139834], 
"eval_len": [34, 152, 224, 88, 34, 35, 51, 134, 37, 120]}

  6%|▌         | 59996/1000000 [36:10<7:32:35, 34.62it/s]global step 60000, trans_decision ep_re 83.42053512628792

{"global_step": 60000, "eval_re": [32.90211428666188, 18.109701965944197, 
176.57871332769284, 193.83030313829613, 20.849058710610908, 245.2803745842615, 
33.43762717687432, 32.290929165561295, 37.54625742016159, 43.38027148681438], 
"eval_len": [51, 29, 121, 141, 33, 140, 52, 48, 49, 50]}

  7%|▋         | 69996/1000000 [42:30<7:28:14, 34.58it/s]global step 70000, trans_decision ep_re 274.953239699878

{"global_step": 70000, "eval_re": [936.7830155436667, 366.5402593652192, 
430.1059717295305, 21.87180747979283, 192.49939842116726, 44.01280911047852, 
311.15653767959725, 27.754522196778492, 35.118155890937864, 383.68991958161126],
"eval_len": [586, 273, 244, 36, 140, 50, 168, 36, 49, 222]}

  8%|▊         | 79996/1000000 [49:04<7:20:03, 34.84it/s]global step 80000, trans_decision ep_re 203.96106509121523

{"global_step": 80000, "eval_re": [255.8797189303823, 398.45744968229747, 
20.910264607837238, 132.157999658353, 57.54325805019533, 199.42289226274357, 
17.62303587821841, 15.443225858112303, 592.0582036816329, 350.1146023023799], 
"eval_len": [165, 171, 37, 98, 87, 215, 27, 33, 277, 167]}

  9%|▉         | 89996/1000000 [55:37<7:16:33, 34.74it/s]global step 90000, trans_decision ep_re 52.141310982295806

{"global_step": 90000, "eval_re": [80.1345174504726, 66.95333029726105, 
35.38535816444605, 23.6000146158425, 73.479931274983, 24.582992375273562, 
62.27343305942159, 23.882943982917997, 69.62336958549908, 61.49721901684065], 
"eval_len": [77, 72, 49, 37, 79, 33, 71, 33, 68, 71]}

 10%|▉         | 99996/1000000 [1:02:07<7:13:31, 34.60it/s]global step 100000, trans_decision ep_re 59.59264866866787

{"global_step": 100000, "eval_re": [231.19382920781257, 28.878925072852642, 
32.32268704202424, 26.524837316085424, 16.126997004517715, 57.051035472352126, 
20.423121425414838, 96.06172229232844, 24.16689962245274, 63.17643223083794], 
"eval_len": [118, 48, 49, 35, 27, 77, 37, 127, 34, 96]}

 11%|█         | 109996/1000000 [1:08:50<7:07:13, 34.72it/s]global step 110000, trans_decision ep_re 93.3591895488776

{"global_step": 110000, "eval_re": [84.31273451390084, 33.107729704842015, 
106.95443790043656, 249.1322932277843, 89.03396231778362, 15.944688673640044, 
23.737181267201176, 196.6402911118313, 47.87131173943489, 86.85726503192129], 
"eval_len": [93, 50, 89, 141, 82, 28, 37, 155, 210, 82]}

 12%|█▏        | 119996/1000000 [1:15:20<7:04:00, 34.59it/s]global step 120000, trans_decision ep_re 55.64128541747333

{"global_step": 120000, "eval_re": [80.67450175977889, 40.80048903609209, 
45.42366184145763, 36.90574506818053, 110.69490705420236, 108.58190295651119, 
66.34307215977786, 21.742824913002657, 23.28782063630628, 21.95792874942384], 
"eval_len": [85, 48, 96, 49, 93, 96, 83, 33, 33, 36]}

 13%|█▎        | 129996/1000000 [1:21:50<6:56:50, 34.79it/s]global step 130000, trans_decision ep_re 145.3917619658829

{"global_step": 130000, "eval_re": [145.38041849134927, 142.75160416771737, 
25.263755983390613, 20.42621844502495, 46.77593457757919, 315.8225547980399, 
619.7121358057857, 13.500889823462362, 33.64179311855074, 90.64231444792885], 
"eval_len": [106, 114, 39, 38, 53, 176, 208, 24, 50, 93]}

 14%|█▍        | 139996/1000000 [1:28:10<6:52:53, 34.71it/s]global step 140000, trans_decision ep_re 150.55434420671943

{"global_step": 140000, "eval_re": [396.4956679739202, 490.1789920555231, 
25.884636645516572, 165.34586649846202, 90.13167326222093, 27.357392786397437, 
121.19033806856241, 87.70251324913384, 18.851454597515296, 82.40490692994233], 
"eval_len": [181, 229, 37, 123, 88, 37, 95, 94, 36, 88]}

 15%|█▍        | 149996/1000000 [1:34:42<6:48:05, 34.71it/s]global step 150000, trans_decision ep_re 72.20714805604706

{"global_step": 150000, "eval_re": [93.8603135902239, 22.947030923574005, 
27.300379295981465, 18.231345797604288, 21.95851296031187, 55.16766382037878, 
25.400033014066025, 320.9095900397682, 51.02138998201328, 85.27522113654881], 
"eval_len": [85, 36, 39, 35, 35, 60, 35, 201, 59, 80]}

 16%|█▌        | 159996/1000000 [1:41:12<6:43:21, 34.71it/s]global step 160000, trans_decision ep_re 79.76239604119628

{"global_step": 160000, "eval_re": [103.13249697580778, 380.49490846084836, 
79.48940280329909, 19.17350588014864, 51.99135839711456, 13.24595065270155, 
53.00764726371316, 20.711346035666487, 56.21722443771523, 20.160119504947946], 
"eval_len": [93, 196, 83, 36, 58, 24, 58, 34, 58, 35]}

 17%|█▋        | 169996/1000000 [1:47:41<6:34:09, 35.10it/s]global step 170000, trans_decision ep_re 177.26856422273207

{"global_step": 170000, "eval_re": [212.59798969488867, 298.2472319512719, 
195.15548123575226, 19.04678704372792, 531.816216654564, 160.92427061964997, 
155.87549938713963, 50.60699667752021, 126.68741196924869, 21.727756993557378], 
"eval_len": [123, 169, 119, 35, 217, 106, 101, 56, 85, 37]}

 18%|█▊        | 179996/1000000 [1:54:12<6:34:12, 34.67it/s]global step 180000, trans_decision ep_re 160.08737498830595

{"global_step": 180000, "eval_re": [83.24541488975206, 29.25890849906145, 
225.39210050982945, 702.3193206297027, 76.20591883182809, 99.48769014416669, 
252.14750648340373, 83.56266208206979, 23.888646775790534, 25.365581037455044], 
"eval_len": [79, 38, 132, 283, 78, 83, 134, 81, 35, 36]}

 19%|█▉        | 189996/1000000 [2:00:41<6:27:31, 34.84it/s]global step 190000, trans_decision ep_re 327.3411970934119

{"global_step": 190000, "eval_re": [25.504326428765726, 41.07746479668106, 
25.537430412078212, 985.8113489214182, 716.5822607623825, 92.36753859299134, 
35.583054344378425, 294.2202310741482, 818.4614774327222, 238.26683816855282], 
"eval_len": [35, 50, 38, 367, 253, 81, 49, 154, 296, 133]}

 20%|█▉        | 199996/1000000 [2:07:13<6:21:51, 34.92it/s]global step 200000, trans_decision ep_re 121.05301948032402

{"global_step": 200000, "eval_re": [12.933960063217675, 395.56707381545243, 
22.32072279011367, 21.05325982953355, 21.023153077532644, 44.33847843775582, 
25.772974288707083, 47.907351022671946, 20.687961370617295, 598.9252601076381], 
"eval_len": [23, 173, 32, 36, 33, 56, 37, 61, 37, 216]}

 21%|██        | 209996/1000000 [2:13:43<6:14:58, 35.11it/s]global step 210000, trans_decision ep_re 80.24515209098045

{"global_step": 210000, "eval_re": [143.23679912344147, 24.06739677191128, 
121.60310354461072, 39.69206094298772, 21.12703609406731, 93.6263901067006, 
173.44466978146764, 114.274196426313, 21.977420385204283, 49.402447733100544], 
"eval_len": [108, 35, 97, 55, 36, 78, 115, 78, 34, 56]}

 22%|██▏       | 219996/1000000 [2:20:12<6:10:56, 35.05it/s]global step 220000, trans_decision ep_re 56.087936072075856

{"global_step": 220000, "eval_re": [35.22158071657642, 60.85115498633935, 
24.45026072044191, 65.70946384839694, 55.02668087076433, 68.49565834800714, 
52.95959816591292, 62.77909916697564, 89.35115651723298, 46.03470738011093], 
"eval_len": [52, 64, 37, 68, 67, 75, 62, 67, 72, 54]}

 23%|██▎       | 229996/1000000 [2:26:42<6:09:36, 34.72it/s]global step 230000, trans_decision ep_re 104.66460874860135

{"global_step": 230000, "eval_re": [49.03292829290626, 46.032853003442845, 
263.4474032459227, 20.142794726945965, 52.595194687700406, 319.4847081674681, 
91.01892431764757, 90.35065082765348, 27.158669922179904, 87.38196029414621], 
"eval_len": [53, 58, 147, 34, 59, 158, 79, 85, 38, 85]}

 24%|██▍       | 239996/1000000 [2:33:11<5:59:29, 35.24it/s]global step 240000, trans_decision ep_re 116.75208059875283

{"global_step": 240000, "eval_re": [439.423889412637, 24.10173434925461, 
276.00650474084887, 26.81273954581527, 23.164387972069832, 27.760907813160173, 
49.92269321668133, 236.45268589160617, 36.996781535636885, 26.878481509818023], 
"eval_len": [177, 34, 160, 36, 34, 38, 62, 153, 48, 38]}

 25%|██▍       | 249996/1000000 [2:39:41<5:58:33, 34.86it/s]global step 250000, trans_decision ep_re 186.90351963941907

{"global_step": 250000, "eval_re": [198.45187706776161, 148.11140325399393, 
517.9422664815474, 24.040432100274415, 293.90359570282584, 24.18057305793347, 
531.2163878207248, 25.778391839080914, 17.623715584787043, 87.78655348526115], 
"eval_len": [140, 89, 196, 37, 142, 35, 213, 39, 35, 84]}

 26%|██▌       | 259996/1000000 [2:46:11<5:54:04, 34.83it/s]global step 260000, trans_decision ep_re 55.624341709979966

{"global_step": 260000, "eval_re": [23.224081542219384, 62.08566652043742, 
88.07207042123305, 19.691931094145524, 24.237549524427823, 20.678537588673272, 
74.88201712854324, 93.85998752689342, 133.5071235233024, 16.004452229924105], 
"eval_len": [35, 64, 81, 30, 36, 30, 67, 75, 97, 30]}

 27%|██▋       | 269996/1000000 [2:52:40<5:45:53, 35.18it/s]global step 270000, trans_decision ep_re 94.89503471922487

{"global_step": 270000, "eval_re": [23.502697010439014, 345.55024170999997, 
87.77233206832366, 19.04445119030529, 50.91883431089454, 172.09889485620874, 
71.09008466615052, 92.46344546054368, 60.46470387646504, 26.044662042918112], 
"eval_len": [38, 167, 79, 38, 59, 115, 67, 92, 70, 38]}

 28%|██▊       | 279996/1000000 [2:59:10<5:43:48, 34.90it/s]global step 280000, trans_decision ep_re 68.35037179146853

{"global_step": 280000, "eval_re": [140.32541126993385, 65.9039325854486, 
144.15367426845913, 126.43875123966234, 74.91681982160756, 23.980085620687113, 
13.605731664301631, 16.476939427354697, 50.45629287935256, 27.24607913787788], 
"eval_len": [99, 71, 109, 98, 76, 38, 25, 28, 60, 38]}

 29%|██▉       | 289996/1000000 [3:05:50<5:32:06, 35.63it/s]global step 290000, trans_decision ep_re 132.91158784096734

{"global_step": 290000, "eval_re": [253.15718875662128, 363.82802995106954, 
22.68798315437017, 22.421374779367316, 23.298614862110306, 207.03024431040072, 
22.514563494805547, 16.11226813454151, 19.465237113052694, 378.60037385333436], 
"eval_len": [134, 153, 38, 39, 33, 112, 35, 27, 29, 185]}

 30%|██▉       | 299996/1000000 [3:12:20<5:35:34, 34.77it/s]global step 300000, trans_decision ep_re 118.18987155087575

{"global_step": 300000, "eval_re": [74.76312778282542, 20.163861777019, 
21.35161617575187, 47.687028623162625, 16.680492768119294, 25.280599440644462, 
69.33941810281753, 17.07362838747675, 325.78182706253506, 563.7771153884056], 
"eval_len": [66, 33, 32, 54, 34, 36, 69, 31, 168, 249]}

 31%|███       | 309996/1000000 [3:18:50<5:29:24, 34.91it/s]global step 310000, trans_decision ep_re 85.51051937894664

{"global_step": 310000, "eval_re": [555.188355644153, 22.694200243615068, 
17.62927402909928, 100.85280001236998, 20.055124383887826, 18.841435976220698, 
52.81302557818395, 22.268222073807973, 25.83050603241876, 18.932249815709852], 
"eval_len": [241, 33, 28, 87, 32, 30, 62, 35, 37, 33]}

 32%|███▏      | 319996/1000000 [3:25:20<5:27:01, 34.66it/s]global step 320000, trans_decision ep_re 95.85374642647577

{"global_step": 320000, "eval_re": [24.562586800810063, 16.8218346607048, 
22.307106409299852, 23.42618481375824, 24.301134227972604, 71.25941390148311, 
667.743873254539, 65.39422341732174, 23.027507907020667, 19.693598871847502], 
"eval_len": [37, 27, 34, 36, 39, 66, 220, 64, 35, 37]}

 33%|███▎      | 329996/1000000 [3:31:50<5:25:35, 34.30it/s]global step 330000, trans_decision ep_re 254.65655773763493

{"global_step": 330000, "eval_re": [514.879002368053, 607.8447378744287, 
20.605593368715418, 684.3550036809493, 26.826181682807196, 74.40712829737465, 
23.91079675224856, 58.13262872018264, 407.8107857655168, 127.79371886607326], 
"eval_len": [221, 209, 32, 233, 39, 75, 35, 60, 167, 96]}

 34%|███▍      | 339996/1000000 [3:38:11<5:17:10, 34.68it/s]global step 340000, trans_decision ep_re 97.79320055017236

{"global_step": 340000, "eval_re": [60.76490376316393, 552.2375067109384, 
68.38941606711464, 18.259142566137022, 22.173705455280604, 15.9722382494412, 
20.1162389465537, 145.80535065399016, 55.21990140855321, 18.993601680550558], 
"eval_len": [66, 212, 65, 31, 34, 27, 30, 107, 59, 33]}

 35%|███▍      | 349996/1000000 [3:44:40<5:09:07, 35.05it/s]global step 350000, trans_decision ep_re 143.33649556185952

{"global_step": 350000, "eval_re": [161.1109442282167, 205.88245989267702, 
26.828840317802978, 99.46918511071449, 19.461919671499615, 106.18954428680075, 
366.04838621004734, 139.63742428454952, 39.30496465723219, 269.4312869590547], 
"eval_len": [108, 120, 37, 81, 31, 89, 167, 96, 52, 134]}

 36%|███▌      | 359996/1000000 [3:51:10<5:05:43, 34.89it/s]global step 360000, trans_decision ep_re 85.99106654733957

{"global_step": 360000, "eval_re": [555.5386430529485, 23.309812688499242, 
26.432513036736413, 35.96513255253764, 16.846651758655927, 25.465214880418706, 
108.94889935164385, 22.731414812284278, 20.65447607161887, 24.017907268052227], 
"eval_len": [256, 32, 39, 45, 35, 39, 115, 38, 32, 39]}

 37%|███▋      | 369996/1000000 [3:57:50<5:01:01, 34.88it/s]global step 370000, trans_decision ep_re 72.17556559007912

{"global_step": 370000, "eval_re": [19.36980342938316, 83.68076489714747, 
53.373830445464485, 17.258983853236202, 160.28784478413456, 49.2267696718344, 
18.705013283562792, 17.677753190764246, 90.2500749046912, 211.9248174405726], 
"eval_len": [29, 71, 61, 35, 99, 58, 29, 27, 76, 125]}

 38%|███▊      | 379996/1000000 [4:04:20<4:56:00, 34.91it/s]global step 380000, trans_decision ep_re 172.22993624854183

{"global_step": 380000, "eval_re": [361.18929970267016, 22.848926707010463, 
28.453683811702362, 25.18982679856738, 22.689102792552692, 474.4204471227579, 
19.99461092181959, 18.360875770210278, 286.8130612248939, 462.33952763323333], 
"eval_len": [160, 35, 39, 38, 39, 174, 35, 27, 140, 174]}

 39%|███▉      | 389996/1000000 [4:10:50<4:50:56, 34.94it/s]global step 390000, trans_decision ep_re 155.1514486672939

{"global_step": 390000, "eval_re": [503.7436929966311, 106.83179006592736, 
16.826266154164365, 27.106286084007138, 24.579279586374547, 120.4137740355701, 
24.05069704209603, 20.524448087290722, 366.3149682864792, 341.1232843343982], 
"eval_len": [211, 83, 29, 36, 33, 87, 32, 30, 165, 151]}

 40%|███▉      | 399996/1000000 [4:17:20<4:48:09, 34.70it/s]global step 400000, trans_decision ep_re 150.1270588788212

{"global_step": 400000, "eval_re": [21.45728129759901, 194.5352190733424, 
37.636754717562944, 135.93901006499303, 476.52656376222774, 340.38431327223935, 
18.69639994656665, 227.08262677654557, 25.133730251021753, 23.878689626113644], 
"eval_len": [36, 136, 44, 108, 193, 177, 29, 133, 36, 39]}

 41%|████      | 409996/1000000 [4:23:38<4:41:37, 34.92it/s]global step 410000, trans_decision ep_re 49.185036840753995

{"global_step": 410000, "eval_re": [34.27187488083133, 78.32033402232456, 
73.95988923420289, 23.793317127755696, 66.94382722494049, 69.91402591135562, 
72.38039435641313, 18.242596570987125, 26.55902093333619, 27.465088145392983], 
"eval_len": [48, 80, 75, 35, 72, 73, 72, 28, 38, 47]}

 42%|████▏     | 419996/1000000 [4:30:07<4:39:06, 34.64it/s]global step 420000, trans_decision ep_re 110.81313000747059

{"global_step": 420000, "eval_re": [20.8005172473983, 21.838917520992528, 
30.484854111072796, 106.46709427875768, 23.38116049547339, 300.05062229062054, 
392.8952159886507, 162.4878996506333, 24.373500001199954, 25.35151848990645], 
"eval_len": [31, 39, 38, 84, 40, 148, 151, 119, 38, 36]}

 43%|████▎     | 429996/1000000 [4:36:36<4:35:00, 34.55it/s]global step 430000, trans_decision ep_re 28.86275794223156

{"global_step": 430000, "eval_re": [25.408916439075597, 24.211390071386457, 
20.66525309518839, 30.062106756629596, 23.13236273313242, 17.906944266719933, 
24.164146083934035, 23.498446551621406, 73.8554692902111, 25.72254413441665], 
"eval_len": [38, 35, 34, 47, 35, 32, 34, 37, 69, 38]}

 44%|████▍     | 439996/1000000 [4:43:04<4:27:06, 34.94it/s]global step 440000, trans_decision ep_re 98.24428598460256

{"global_step": 440000, "eval_re": [22.46938141273765, 21.658119977068058, 
52.90584931281549, 20.58580971867992, 41.26774677619634, 25.67083094689296, 
60.57492121961024, 25.29871499696475, 15.13700142377546, 696.8744840612846], 
"eval_len": [32, 36, 60, 34, 46, 45, 62, 37, 25, 224]}

 45%|████▍     | 449996/1000000 [4:49:33<4:21:38, 35.04it/s]global step 450000, trans_decision ep_re 75.12060986607071

{"global_step": 450000, "eval_re": [23.568675391601964, 42.21971446516763, 
22.505873670304062, 20.58131555213619, 116.49227357089865, 409.80445672635227, 
33.03928036777381, 34.01400751229193, 21.753923401978966, 27.22657800220169], 
"eval_len": [36, 49, 35, 36, 98, 180, 45, 44, 39, 37]}

 46%|████▌     | 459996/1000000 [4:56:03<4:18:20, 34.84it/s]global step 460000, trans_decision ep_re 86.04886195445165

{"global_step": 460000, "eval_re": [53.86881376748204, 69.17684149064974, 
462.59001496345235, 57.802506660245896, 62.05453697768178, 21.821344705918932, 
27.601564803216263, 12.841560805290634, 23.28137914663943, 69.45005622393944], 
"eval_len": [62, 69, 209, 63, 68, 34, 35, 27, 34, 68]}

 47%|████▋     | 469996/1000000 [5:02:32<4:14:09, 34.76it/s]global step 470000, trans_decision ep_re 72.83794475677429

{"global_step": 470000, "eval_re": [35.1935117782964, 352.7527775349941, 
18.08936970279665, 27.563728611584818, 35.91891672149252, 34.055441807938536, 
22.202340585333282, 159.77772480489284, 19.39187756801628, 23.433758452397402], 
"eval_len": [44, 153, 32, 38, 45, 43, 41, 97, 33, 35]}

 48%|████▊     | 479996/1000000 [5:09:01<4:08:14, 34.91it/s]global step 480000, trans_decision ep_re 182.41806647473652

{"global_step": 480000, "eval_re": [77.73744503053811, 15.542475274355088, 
208.57974293482093, 30.510250148280356, 18.72891077695431, 561.5810594208559, 
14.181885111563876, 106.97566542684261, 566.4473964490267, 223.89583417412737], 
"eval_len": [69, 29, 117, 40, 36, 261, 25, 83, 210, 128]}

 49%|████▉     | 489996/1000000 [5:15:32<4:04:30, 34.76it/s]global step 490000, trans_decision ep_re 189.61251709934487

{"global_step": 490000, "eval_re": [107.67749909393655, 95.16070288834334, 
343.30350696704915, 92.54812552323789, 182.47040446439377, 634.4017894546887, 
74.46253224552288, 31.94807367493584, 26.344838090401694, 307.80769859093897], 
"eval_len": [84, 80, 183, 126, 120, 245, 76, 48, 37, 151]}

 50%|████▉     | 499996/1000000 [5:22:04<3:58:24, 34.95it/s]global step 500000, trans_decision ep_re 101.10263916979588

{"global_step": 500000, "eval_re": [450.9618977920524, 254.71773060154274, 
130.57799072460335, 25.25337682068011, 22.521379271618464, 29.795725905829773, 
24.013719887787904, 24.596897261760027, 25.326091744419124, 23.261581687664684],
"eval_len": [163, 121, 88, 38, 35, 41, 39, 36, 36, 36]}

 51%|█████     | 509996/1000000 [5:28:33<3:54:07, 34.88it/s]global step 510000, trans_decision ep_re 29.81136116174548

{"global_step": 510000, "eval_re": [17.80837924094449, 25.724775693311436, 
23.730039202869406, 27.14262400572229, 72.37321722020955, 22.72910569785076, 
28.559163993611776, 18.974684127865217, 33.856475315413554, 27.215147119656308],
"eval_len": [39, 37, 35, 39, 67, 33, 38, 35, 45, 41]}

 52%|█████▏    | 519996/1000000 [5:35:01<3:50:22, 34.73it/s]global step 520000, trans_decision ep_re 94.84929242366562

{"global_step": 520000, "eval_re": [55.70106934987119, 17.56933000783564, 
23.917525440159253, 330.117475035431, 21.126707670862125, 12.877508102200133, 
313.119419890621, 130.53291302314034, 17.39472316249237, 26.136252554043168], 
"eval_len": [59, 36, 37, 181, 38, 24, 151, 108, 29, 38]}

 53%|█████▎    | 529996/1000000 [5:41:40<3:44:03, 34.96it/s]global step 530000, trans_decision ep_re 61.240251572299506

{"global_step": 530000, "eval_re": [14.149184090444896, 25.943914519610285, 
92.21679582157535, 36.698217841173424, 316.795233586307, 16.646505418910696, 
21.215889317732845, 22.445805581660807, 47.532260054594325, 18.758709490985606],
"eval_len": [25, 37, 82, 48, 144, 27, 35, 35, 47, 38]}

 54%|█████▍    | 539996/1000000 [5:47:53<3:31:37, 36.23it/s]global step 540000, trans_decision ep_re 32.309350191182446

{"global_step": 540000, "eval_re": [44.71798033476409, 26.656489958197703, 
16.002735323478248, 25.518994689657905, 21.78368901273843, 26.11399915932204, 
22.653874121823986, 41.95552295262976, 46.0764777408726, 51.61373861833972], 
"eval_len": [60, 39, 28, 37, 38, 37, 33, 46, 48, 60]}

 55%|█████▍    | 549996/1000000 [5:54:20<3:29:18, 35.83it/s]global step 550000, trans_decision ep_re 188.60195285829366

{"global_step": 550000, "eval_re": [420.57440242074, 387.2327284696784, 
20.160847159449876, 60.74394679033423, 28.840950481382997, 399.61795460402976, 
25.984018719657442, 20.123016748773562, 17.524542873514296, 505.2171203153759], 
"eval_len": [173, 254, 36, 65, 38, 185, 36, 31, 29, 215]}

 56%|█████▌    | 559996/1000000 [6:00:24<3:23:08, 36.10it/s]global step 560000, trans_decision ep_re 181.159516794229

{"global_step": 560000, "eval_re": [44.46266863772196, 494.8417671678171, 
22.265322431492738, 27.21216626454194, 101.86107468920359, 349.45104093083046, 
362.593041281167, 157.99931523689438, 31.347196827976816, 219.5615744746441], 
"eval_len": [48, 209, 33, 38, 111, 184, 180, 90, 37, 128]}

 57%|█████▋    | 569996/1000000 [6:06:41<3:18:01, 36.19it/s]global step 570000, trans_decision ep_re 136.69861513588503

{"global_step": 570000, "eval_re": [20.834315490262707, 153.73791105670583, 
436.7283890161251, 97.17271321303055, 96.46464281651721, 17.293316108057546, 
25.143878691461936, 16.840963476538256, 131.44815261525008, 371.3218688749011], 
"eval_len": [34, 133, 181, 84, 81, 30, 33, 31, 123, 197]}

 58%|█████▊    | 579996/1000000 [6:12:55<3:12:52, 36.29it/s]global step 580000, trans_decision ep_re 97.13797903520263

{"global_step": 580000, "eval_re": [727.8476809859819, 26.037746257962528, 
18.313010079977033, 38.398126237793356, 28.138213801814956, 39.22271692374423, 
19.162346399065264, 43.28339702920445, 20.21374947200423, 10.762803164478353], 
"eval_len": [253, 36, 39, 45, 37, 45, 30, 48, 39, 26]}

 59%|█████▉    | 589996/1000000 [6:19:07<3:09:41, 36.02it/s]global step 590000, trans_decision ep_re 25.846978959031503

{"global_step": 590000, "eval_re": [20.899668566217372, 30.910699277259535, 
24.67728441211767, 22.327256251903375, 38.70589966209704, 29.742765164803977, 
23.01902239801462, 32.55558042232415, 15.94265522320886, 19.688958212368377], 
"eval_len": [36, 39, 38, 37, 49, 39, 37, 41, 27, 31]}

 60%|█████▉    | 599996/1000000 [6:25:18<3:03:22, 36.35it/s]global step 600000, trans_decision ep_re 56.81789954600272

{"global_step": 600000, "eval_re": [38.50012688383518, 183.58216742049663, 
36.914079169375455, 157.5980107828789, 29.03247760468147, 19.87547980801797, 
36.93697634552109, 20.32134034312632, 20.99907661713262, 24.419260484961516], 
"eval_len": [50, 114, 45, 96, 39, 35, 45, 31, 33, 35]}

 61%|██████    | 609999/1000000 [6:31:37<3:00:39, 35.98it/s]global step 610000, trans_decision ep_re 70.4727621189935

{"global_step": 610000, "eval_re": [20.976037891422745, 85.62957986274839, 
13.639648298755482, 15.59018435428009, 25.218003136041297, 287.1650647006361, 
22.08426342257509, 28.46283884773784, 183.81369476183067, 22.148305913907272], 
"eval_len": [36, 79, 27, 25, 33, 133, 33, 39, 112, 34]}

 62%|██████▏   | 619999/1000000 [6:37:51<2:56:23, 35.91it/s]global step 620000, trans_decision ep_re 165.58033486361686

{"global_step": 620000, "eval_re": [115.7938988312936, 195.1586268700756, 
20.653776407613456, 25.47818078296128, 61.695538273539015, 23.931575040571442, 
295.7518733854638, 93.89672518769434, 449.1384174601693, 374.30473639678667], 
"eval_len": [88, 110, 38, 36, 62, 36, 139, 82, 209, 168]}

 63%|██████▎   | 629999/1000000 [6:44:07<2:50:21, 36.20it/s]global step 630000, trans_decision ep_re 164.10247185357656

{"global_step": 630000, "eval_re": [19.910669181334036, 462.9654362249628, 
22.989450347139087, 23.03014614825176, 297.9171944443262, 72.4511820769558, 
87.95935538483117, 36.97970768724076, 131.96112596047982, 484.8604510802443], 
"eval_len": [37, 189, 33, 38, 173, 75, 80, 50, 91, 217]}

 64%|██████▍   | 639999/1000000 [6:50:22<2:46:58, 35.93it/s]global step 640000, trans_decision ep_re 136.78635197151067

{"global_step": 640000, "eval_re": [363.79473636818994, 25.353532779714055, 
25.1693861009954, 18.03700056359724, 37.738005700482525, 41.42683967981999, 
456.8070440657353, 12.35356457974742, 171.0094095661726, 216.17400031065245], 
"eval_len": [157, 38, 38, 28, 48, 50, 192, 30, 130, 137]}

 65%|██████▍   | 649999/1000000 [6:56:37<2:41:29, 36.12it/s]global step 650000, trans_decision ep_re 84.15110316504538

{"global_step": 650000, "eval_re": [14.180055862810264, 22.159633830836004, 
14.1865161608137, 192.3772120322559, 21.830001004052214, 22.222468286336532, 
282.2822287640452, 22.65742654622237, 59.90182452345936, 189.71366463962212], 
"eval_len": [26, 39, 27, 115, 34, 35, 146, 36, 68, 134]}

 66%|██████▌   | 659999/1000000 [7:02:52<2:36:24, 36.23it/s]global step 660000, trans_decision ep_re 53.2696966055618

{"global_step": 660000, "eval_re": [43.10544016325506, 22.438810630423404, 
53.6361854800103, 51.7449130825559, 19.47253462838642, 17.447293199993926, 
60.61228706259179, 177.28394436977976, 51.617424925801146, 35.338132512820266], 
"eval_len": [53, 41, 60, 53, 30, 32, 65, 137, 63, 51]}

 67%|██████▋   | 669999/1000000 [7:09:06<2:32:26, 36.08it/s]global step 670000, trans_decision ep_re 148.85325295758352

{"global_step": 670000, "eval_re": [293.37266567009556, 185.66470629656655, 
313.9416657658044, 20.352187449653513, 15.89619387753451, 28.35427121866924, 
231.1332038762463, 22.00546215678278, 316.02805230672317, 61.78412095775905], 
"eval_len": [163, 162, 207, 37, 29, 39, 183, 34, 175, 77]}

 68%|██████▊   | 679999/1000000 [7:15:22<2:27:59, 36.04it/s]global step 680000, trans_decision ep_re 76.34441594472261

{"global_step": 680000, "eval_re": [25.848349734281328, 61.8779246822536, 
18.23046465891835, 23.72414153586875, 492.7451731069747, 15.405338231382293, 
21.739377788420512, 18.68791104867356, 26.5574927108387, 58.62798594961426], 
"eval_len": [37, 68, 29, 37, 264, 26, 34, 36, 37, 61]}

 69%|██████▉   | 689999/1000000 [7:21:50<2:23:02, 36.12it/s]global step 690000, trans_decision ep_re 273.13432641426476

{"global_step": 690000, "eval_re": [606.1569463287325, 694.4232227410755, 
329.9876924749592, 144.388114161115, 246.70641430050395, 20.35432271234292, 
297.8368663480391, 214.94513992149402, 38.20559758820199, 138.33894756618298], 
"eval_len": [247, 239, 167, 119, 134, 36, 147, 127, 51, 122]}

 70%|██████▉   | 699996/1000000 [7:27:53<2:19:06, 35.94it/s]global step 700000, trans_decision ep_re 151.2993176953944

{"global_step": 700000, "eval_re": [19.796180397907587, 107.63229998389018, 
24.25424886101082, 27.245668087051115, 14.721637928709512, 40.45613667676192, 
290.50354378840836, 16.962625578015153, 712.9524875106317, 258.4683481415575], 
"eval_len": [30, 81, 38, 36, 28, 50, 164, 28, 260, 150]}

 71%|███████   | 709996/1000000 [7:34:08<2:14:04, 36.05it/s]global step 710000, trans_decision ep_re 58.13460612693716

{"global_step": 710000, "eval_re": [48.17823302504175, 37.89003241060486, 
133.81605705946268, 16.100174198589517, 21.551094829424237, 94.0437183234552, 
20.517675311536323, 24.737058416436078, 131.80283002528876, 52.70918766953221], 
"eval_len": [58, 48, 94, 31, 37, 84, 31, 39, 94, 56]}

 72%|███████▏  | 719996/1000000 [7:40:23<2:09:35, 36.01it/s]global step 720000, trans_decision ep_re 256.27823246724813

{"global_step": 720000, "eval_re": [566.529393966347, 101.18998629686571, 
380.70971084091644, 517.8255015380502, 21.4534496043809, 10.890113913585425, 
909.7054080797955, 15.760763988383042, 18.940340761693772, 19.77765568246337], 
"eval_len": [232, 79, 328, 202, 36, 21, 256, 29, 31, 31]}

 73%|███████▎  | 729996/1000000 [7:46:50<2:04:58, 36.01it/s]global step 730000, trans_decision ep_re 139.21738307239195

{"global_step": 730000, "eval_re": [49.64168312132962, 252.42219318263614, 
184.1036432302322, 129.29779513795884, 189.8190367934555, 73.32726415927739, 
29.43264444296312, 147.327286092826, 313.38900155767965, 23.413283005560885], 
"eval_len": [58, 151, 108, 95, 102, 76, 48, 94, 173, 37]}

 74%|███████▍  | 739996/1000000 [7:52:55<2:00:15, 36.04it/s]global step 740000, trans_decision ep_re 139.95797091044375

{"global_step": 740000, "eval_re": [36.05523637455034, 22.040564201648237, 
431.68012827208264, 303.52611740286693, 235.94899538826374, 261.2195489496059, 
52.069014834329664, 18.787519310399105, 21.11915114986499, 17.133433220825935], 
"eval_len": [56, 38, 202, 165, 130, 170, 60, 29, 36, 28]}

 75%|███████▍  | 749996/1000000 [7:59:11<1:55:49, 35.97it/s]global step 750000, trans_decision ep_re 129.78693004722567

{"global_step": 750000, "eval_re": [22.639169485020915, 81.55079523517232, 
19.15496505567414, 16.911909943764815, 283.81659322050484, 189.76985516287945, 
39.52345247011541, 611.3874478357689, 16.021852227223548, 17.093259836132283], 
"eval_len": [35, 76, 32, 35, 129, 100, 51, 249, 27, 36]}

 76%|███████▌  | 759996/1000000 [8:05:26<1:51:19, 35.93it/s]global step 760000, trans_decision ep_re 163.952149203399

{"global_step": 760000, "eval_re": [15.383519578246643, 26.770673052236, 
50.82916070046593, 148.99263071323026, 23.680621034754033, 19.298936427325796, 
390.51853708375336, 18.635413144703545, 653.7037315259163, 291.7082687733583], 
"eval_len": [28, 38, 58, 104, 38, 33, 180, 37, 288, 145]}

 77%|███████▋  | 769996/1000000 [8:11:43<1:45:55, 36.19it/s]global step 770000, trans_decision ep_re 62.285983353787856

{"global_step": 770000, "eval_re": [23.17557977462788, 145.0672697556126, 
17.24428748761227, 22.61151672100672, 20.315934127840478, 16.648093401307374, 
16.82623978724295, 13.200102913886221, 192.87924309529717, 154.8915664734448], 
"eval_len": [38, 102, 29, 33, 35, 28, 30, 24, 126, 116]}

 78%|███████▊  | 779996/1000000 [8:18:10<1:41:17, 36.20it/s]global step 780000, trans_decision ep_re 202.31013357348365

{"global_step": 780000, "eval_re": [712.5891745365515, 367.5381033490076, 
503.2667255606373, 126.41163635237776, 23.349346500020463, 21.095248467393862, 
24.734186724482335, 19.23950615712054, 202.58682743583572, 22.29058065140955], 
"eval_len": [254, 181, 202, 167, 38, 35, 36, 31, 321, 37]}

 79%|███████▉  | 789996/1000000 [8:24:15<1:37:16, 35.98it/s]global step 790000, trans_decision ep_re 102.08102045866417

{"global_step": 790000, "eval_re": [212.1658651004522, 29.13180167439837, 
301.38797301720894, 22.652327763602745, 85.10153681399365, 21.528305788946014, 
16.09654765390366, 21.466480522720953, 14.83111558233094, 296.4482506690842], 
"eval_len": [108, 38, 147, 34, 81, 36, 30, 34, 27, 168]}

 80%|███████▉  | 799996/1000000 [8:30:31<1:32:25, 36.07it/s]global step 800000, trans_decision ep_re 56.31525914407776

{"global_step": 800000, "eval_re": [21.52951196121684, 180.22973418517824, 
93.40628975302762, 24.412030296763103, 12.88145767459212, 23.478066133698345, 
21.596954598888175, 23.454619054233202, 119.4931914033363, 42.670736379843696], 
"eval_len": [34, 112, 81, 34, 23, 37, 34, 38, 125, 53]}

 81%|████████  | 809996/1000000 [8:36:46<1:27:54, 36.02it/s]global step 810000, trans_decision ep_re 154.31410987542876

{"global_step": 810000, "eval_re": [120.39759302322452, 124.72762625625671, 
26.720980909273102, 19.034215926000424, 116.11363777158567, 49.638313904136105, 
288.36202041712636, 277.17302868022745, 272.0079141178066, 248.96576774865068], 
"eval_len": [84, 91, 38, 36, 85, 60, 162, 192, 143, 147]}

 82%|████████▏ | 819996/1000000 [8:43:03<1:23:31, 35.92it/s]global step 820000, trans_decision ep_re 344.7281842867213

{"global_step": 820000, "eval_re": [661.7603081324078, 18.572753663739732, 
457.7185700585475, 20.427252215527723, 201.99133087278432, 72.56294644868223, 
540.8445094662867, 448.7873585366756, 22.91457312694283, 1001.7022403456187], 
"eval_len": [277, 32, 310, 32, 121, 81, 190, 180, 35, 296]}

 83%|████████▎ | 829996/1000000 [8:49:22<1:18:43, 35.99it/s]global step 830000, trans_decision ep_re 79.2344548288707

{"global_step": 830000, "eval_re": [16.17909253580027, 23.987376506131938, 
19.159946481010497, 168.6179075599484, 122.67566157711451, 21.24627948661128, 
102.67998183653974, 41.45923952269704, 227.0669721906638, 49.272090592189464], 
"eval_len": [34, 37, 32, 132, 88, 34, 87, 52, 140, 58]}

 84%|████████▍ | 839996/1000000 [8:55:50<1:14:13, 35.93it/s]global step 840000, trans_decision ep_re 181.826937135984

{"global_step": 840000, "eval_re": [772.4694124822639, 35.4316722657117, 
188.3803787941354, 144.59650730320092, 12.254644480149754, 536.6344605430074, 
20.914751535888676, 61.573125851487724, 27.819047809846396, 18.195370294148265],
"eval_len": [249, 47, 142, 93, 23, 199, 35, 65, 38, 35]}

 85%|████████▍ | 849996/1000000 [9:01:54<1:09:04, 36.19it/s]global step 850000, trans_decision ep_re 109.82089912141619

{"global_step": 850000, "eval_re": [20.3303455685574, 28.34934003209305, 
18.91130584301369, 32.83723739508876, 276.88079427409895, 56.841770271992516, 
62.73234520974157, 152.5159587326721, 217.21662134059375, 231.5932725463099], 
"eval_len": [33, 46, 28, 48, 158, 61, 79, 103, 133, 126]}

 86%|████████▌ | 859996/1000000 [9:08:10<1:05:13, 35.78it/s]global step 860000, trans_decision ep_re 134.97159415077374

{"global_step": 860000, "eval_re": [203.1195121152275, 160.41476974661845, 
25.428362040609183, 22.834121910604704, 27.305339308338663, 20.25504420885494, 
18.057371291993924, 375.6021823651409, 459.1763469633025, 37.52289155704669], 
"eval_len": [127, 151, 36, 38, 39, 33, 31, 237, 194, 49]}

 87%|████████▋ | 869996/1000000 [9:14:27<1:00:09, 36.02it/s]global step 870000, trans_decision ep_re 96.30639350538047

{"global_step": 870000, "eval_re": [27.989169209508077, 49.19993073180076, 
25.658814900373443, 24.279231299884934, 422.7957737605579, 21.972434794440208, 
27.777572660469357, 56.07686780811422, 264.6480860575245, 42.66605383113123], 
"eval_len": [37, 60, 36, 35, 163, 36, 37, 63, 143, 48]}

 88%|████████▊ | 879996/1000000 [9:20:42<55:24, 36.09it/s]global step 880000, trans_decision ep_re 92.95436422036185

{"global_step": 880000, "eval_re": [82.26797322990427, 73.81972402434755, 
58.97336956081798, 12.438587881132563, 104.85392809040437, 27.46929939747583, 
174.92210428214034, 213.84408681485706, 158.73074665794826, 22.22382226459025], 
"eval_len": [72, 72, 64, 28, 80, 39, 146, 113, 85, 35]}

 89%|████████▉ | 889996/1000000 [9:27:10<50:54, 36.01it/s]global step 890000, trans_decision ep_re 108.92963152695197

{"global_step": 890000, "eval_re": [22.68185882022632, 16.23037144340853, 
58.79204241446421, 57.30924489664415, 197.40024975489314, 135.75085370850877, 
355.898652231577, 24.903223353226494, 26.649423452136247, 193.6803951944348], 
"eval_len": [33, 28, 66, 62, 119, 93, 169, 37, 38, 153]}

 90%|████████▉ | 899996/1000000 [9:33:14<46:25, 35.91it/s]global step 900000, trans_decision ep_re 106.49590512247865

{"global_step": 900000, "eval_re": [31.419681638333472, 24.848992332963608, 
35.715255948264065, 507.6275069714671, 21.96940915265775, 59.04989154151857, 
56.421167782279106, 19.853343509285438, 246.87292101289577, 61.18088133512159], 
"eval_len": [49, 38, 48, 243, 34, 60, 62, 37, 152, 60]}

 91%|█████████ | 909996/1000000 [9:39:40<41:34, 36.09it/s]global step 910000, trans_decision ep_re 71.27919270664646

{"global_step": 910000, "eval_re": [20.99889079862481, 96.71487879852022, 
31.34644972678881, 40.8061854516342, 57.78584037653555, 27.226999647418, 
45.6543588821088, 305.0563691932403, 26.33154744863169, 60.87040674296215], 
"eval_len": [29, 86, 49, 53, 60, 37, 56, 153, 37, 60]}

 92%|█████████▏| 919996/1000000 [9:45:45<37:06, 35.94it/s]global step 920000, trans_decision ep_re 255.89334042042447

{"global_step": 920000, "eval_re": [55.82081023329242, 19.523911074467552, 
16.328628424315472, 122.60416939383221, 23.253755775032932, 344.78049527560444, 
696.5398008606596, 199.92021265506287, 505.48494077036474, 574.6766797416121], 
"eval_len": [62, 30, 27, 95, 35, 147, 239, 199, 198, 271]}

 93%|█████████▎| 929996/1000000 [9:52:04<32:20, 36.08it/s]global step 930000, trans_decision ep_re 78.34270761290858

{"global_step": 930000, "eval_re": [19.888793129576825, 70.43563927999722, 
264.94656348382995, 64.23388326868738, 54.41019701908688, 56.45843513772039, 
78.77608823139687, 52.836752442382576, 25.675720963071615, 95.76500317333615], 
"eval_len": [36, 74, 140, 65, 62, 64, 77, 59, 36, 82]}

 94%|█████████▍| 939996/1000000 [9:58:30<27:52, 35.88it/s]global step 940000, trans_decision ep_re 86.62128628378471

{"global_step": 940000, "eval_re": [98.14282880269381, 24.839755100163526, 
55.3600083260793, 20.608113962362104, 24.284408433933372, 369.3450176909119, 
19.496572527398005, 211.31098904278977, 18.83713607325102, 23.98803287826429], 
"eval_len": [82, 36, 60, 35, 33, 157, 37, 111, 33, 38]}

 95%|█████████▍| 949996/1000000 [10:08:15<23:05, 36.08it/s]global step 950000, trans_decision ep_re 79.25777835310234

{"global_step": 950000, "eval_re": [166.96606346129886, 123.61397433328858, 
20.421788328956115, 26.3826159085891, 138.3164137189926, 25.756929045617586, 
138.15295187298105, 14.030013640417438, 18.037994135654, 120.89903908522795], 
"eval_len": [188, 85, 37, 37, 140, 38, 168, 26, 28, 89]}

 96%|█████████▌| 959996/1000000 [10:14:31<18:31, 35.98it/s]global step 960000, trans_decision ep_re 145.04516973574374

{"global_step": 960000, "eval_re": [23.664919891039165, 197.51622459986186, 
61.28445094911554, 24.620881785062803, 198.20216850678278, 19.161413327610347, 
457.5912363166566, 16.24888924115184, 182.1171123435851, 270.0444003965716], 
"eval_len": [33, 125, 63, 34, 150, 32, 189, 28, 109, 154]}

 97%|█████████▋| 969997/1000000 [10:26:43<13:55, 35.91it/s]global step 970000, trans_decision ep_re 80.10664875468018

{"global_step": 970000, "eval_re": [24.201196790091334, 216.004580726144, 
253.50188914028567, 39.01833680404826, 45.72546017797974, 22.219553194787334, 
22.45926577334986, 104.85354267626647, 20.33005710603701, 52.75260515781214], 
"eval_len": [35, 135, 135, 48, 57, 34, 39, 86, 39, 61]}

 98%|█████████▊| 979999/1000000 [10:33:28<09:16, 35.96it/s]global step 980000, trans_decision ep_re 100.45690531823432

{"global_step": 980000, "eval_re": [24.96193635134212, 543.3554634287286, 
25.283740319432923, 20.93695836516351, 225.88907173492348, 22.071538387648282, 
65.58451767524078, 23.18230276920942, 33.3252806773224, 19.9782434733316], 
"eval_len": [35, 238, 36, 33, 116, 36, 66, 38, 45, 31]}

 99%|█████████▉| 989999/1000000 [10:39:44<04:36, 36.12it/s]global step 990000, trans_decision ep_re 206.72618572960647

{"global_step": 990000, "eval_re": [401.6180913467642, 17.087972151083175, 
306.4199027983827, 213.76573591631026, 25.758241234081122, 303.1910179917682, 
363.30406753761036, 181.36361881437765, 39.52347833613082, 215.22973116955632], 
"eval_len": [184, 30, 147, 143, 39, 148, 163, 101, 53, 145]}

100%|█████████▉| 999999/1000000 [10:46:01<00:00, 36.03it/s]global step 1000000, trans_decision ep_re 154.18104506039853

{"global_step": 1000000, "eval_re": [169.1211230577916, 216.6534153929334, 
747.8771769267149, 157.4487549406612, 27.573267774641355, 26.23411448094531, 
64.08529097388237, 50.72221956538923, 32.20033998731105, 49.89474750371495], 
"eval_len": [118, 152, 253, 116, 39, 39, 75, 61, 47, 58]}

100%|██████████| 1000000/1000000 [10:46:14<00:00, 25.79it/s]
