
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [03:03<7:21:15, 37.39it/s]global step 10000, trans_decision ep_re 85.8281276418004

{"global_step": 10000, "eval_re": [102.99821887766164, 86.42730852172788, 
114.98132916553321, 103.47030386701347, 201.09224643952757, 34.67894804440314, 
60.74176291262728, 89.88795517728768, 36.10317711006167, 27.900026302160594], 
"eval_len": [77, 67, 84, 81, 124, 35, 51, 68, 40, 33]}

  2%|▏         | 19999/1000000 [09:01<7:25:50, 36.63it/s]global step 20000, trans_decision ep_re 42.69773850978863

{"global_step": 20000, "eval_re": [51.86862133260692, 29.226014305319286, 
44.83840191911609, 60.348984283920274, 31.364154121326802, 34.88139511713632, 
46.576835319985, 54.99743846179619, 26.12231185133086, 46.753228385348585], 
"eval_len": [49, 37, 46, 54, 38, 34, 42, 52, 33, 46]}

  3%|▎         | 29999/1000000 [15:10<7:16:13, 37.06it/s]global step 30000, trans_decision ep_re 148.848734647394

{"global_step": 30000, "eval_re": [304.8140653969127, 29.499311073150214, 
234.17978530761795, 140.640367122645, 118.528896691136, 112.40834518807475, 
93.0874528858379, 137.6637425258765, 180.41725931145135, 137.24812097123743], 
"eval_len": [203, 33, 183, 99, 82, 77, 80, 114, 120, 109]}

  4%|▍         | 39996/1000000 [20:56<7:10:36, 37.16it/s]global step 40000, trans_decision ep_re 73.4692574700371

{"global_step": 40000, "eval_re": [99.34494600884916, 54.956875634578445, 
241.43159194727525, 19.568020004146636, 35.36692394565561, 27.650242386241246, 
53.09596355628992, 32.94369903124633, 30.49711965931194, 139.83719252677653], 
"eval_len": [79, 52, 132, 25, 35, 31, 47, 35, 35, 88]}

  5%|▍         | 49996/1000000 [26:52<7:05:48, 37.18it/s]global step 50000, trans_decision ep_re 131.7987114274335

{"global_step": 50000, "eval_re": [35.73824522185151, 135.23083735810164, 
309.003909717023, 23.897646861147823, 89.01487091370466, 218.05840857436377, 
232.18628224210633, 123.44813245092345, 30.330565583704754, 121.0782153514079], 
"eval_len": [36, 97, 149, 29, 72, 112, 129, 79, 32, 85]}

  6%|▌         | 59996/1000000 [33:00<7:03:15, 37.01it/s]global step 60000, trans_decision ep_re 137.94656291177685

{"global_step": 60000, "eval_re": [213.26958751717535, 115.92226037665728, 
138.70555642631317, 114.88296325402459, 94.08314611398397, 117.0967001357114, 
197.41590107957035, 151.5585854144744, 25.11517515291872, 211.41575364693927], 
"eval_len": [133, 87, 97, 83, 68, 78, 119, 105, 31, 145]}

  7%|▋         | 69999/1000000 [38:50<7:00:03, 36.90it/s]global step 70000, trans_decision ep_re 94.6133016337325

{"global_step": 70000, "eval_re": [26.35296913404632, 125.38406480508985, 
28.89432534671612, 95.97065026726752, 224.56288590666546, 98.27495998451128, 
89.67100873617154, 81.76953564426499, 151.9475217700325, 23.30509474255946], 
"eval_len": [31, 91, 30, 71, 125, 73, 73, 75, 94, 29]}

  8%|▊         | 79999/1000000 [44:50<6:57:02, 36.77it/s]global step 80000, trans_decision ep_re 102.90481755895546

{"global_step": 80000, "eval_re": [165.07467679696208, 119.0035462242804, 
103.46257231492706, 184.47225034989845, 97.19784646877564, 41.542568001488306, 
100.61098465992826, 82.30346095073426, 105.01757903123014, 30.36269079133], 
"eval_len": [102, 79, 72, 103, 73, 44, 72, 66, 73, 35]}

  9%|▉         | 89999/1000000 [50:47<6:42:30, 37.68it/s]global step 90000, trans_decision ep_re 110.28547573960759

{"global_step": 90000, "eval_re": [127.8862777567912, 112.31362628971357, 
213.40159486785737, 148.67638773453777, 56.46812611277245, 106.66585510609008, 
166.5666397575736, 115.34134186384996, 28.59424452213476, 26.940663384755236], 
"eval_len": [87, 80, 114, 93, 58, 73, 97, 88, 33, 30]}

 10%|▉         | 99999/1000000 [56:41<6:43:09, 37.21it/s]global step 100000, trans_decision ep_re 190.9888001679006

{"global_step": 100000, "eval_re": [326.8834150149264, 181.06973880806686, 
190.62426126520614, 149.25991386613126, 154.36510213004598, 217.0347194569673, 
269.99673569928575, 159.23068603363959, 118.0285776889487, 143.39485171578798], 
"eval_len": [161, 111, 111, 99, 100, 124, 138, 100, 101, 93]}

 11%|█         | 109999/1000000 [1:02:38<6:32:38, 37.78it/s]global step 110000, trans_decision ep_re 89.21916069784604

{"global_step": 110000, "eval_re": [121.98510402966755, 82.887710620527, 
86.6684344996812, 102.26425254835848, 94.58827362504644, 99.37134057269914, 
138.23450783570343, 25.9750051723549, 51.871739731945624, 88.34523834247655], 
"eval_len": [87, 66, 64, 81, 73, 79, 91, 30, 45, 67]}

 12%|█▏        | 119999/1000000 [1:08:31<6:29:50, 37.62it/s]global step 120000, trans_decision ep_re 120.28595687176997

{"global_step": 120000, "eval_re": [31.701971503229938, 29.940022387945984, 
29.12466826372108, 101.48633441157942, 98.84317178832426, 85.11298564187511, 
456.6312178806803, 126.2172284880548, 93.15748933760061, 150.64447901468816], 
"eval_len": [36, 32, 32, 78, 72, 61, 196, 88, 72, 94]}

 13%|█▎        | 129999/1000000 [1:14:24<6:25:40, 37.60it/s]global step 130000, trans_decision ep_re 127.5569529261832

{"global_step": 130000, "eval_re": [60.4839936199365, 204.93737421022917, 
166.3193535229076, 203.9990073994861, 47.92739683238993, 179.19473212291703, 
28.289175116917573, 185.54078628388353, 84.35026186932456, 114.52744828383982], 
"eval_len": [57, 120, 108, 133, 53, 113, 29, 129, 67, 78]}

 14%|█▍        | 139998/1000000 [1:20:19<6:24:59, 37.23it/s]global step 140000, trans_decision ep_re 98.17989826398107

{"global_step": 140000, "eval_re": [107.09330152745864, 33.261978910319904, 
110.19065291264663, 33.969075239720574, 53.77879767580846, 51.86692676376081, 
383.21015885341177, 22.037027427079977, 134.3884138615519, 52.00264946805201], 
"eval_len": [72, 42, 79, 38, 50, 49, 179, 26, 97, 49]}

 15%|█▍        | 149998/1000000 [1:26:16<6:18:15, 37.45it/s]global step 150000, trans_decision ep_re 145.298462829299

{"global_step": 150000, "eval_re": [127.56866487848349, 113.90095076193927, 
109.9163031512884, 33.2200478196637, 324.655768938891, 100.95703396637157, 
187.24176243877844, 93.2446656790043, 116.85399394594961, 245.42543671262013], 
"eval_len": [85, 83, 85, 35, 174, 81, 116, 80, 81, 137]}

 16%|█▌        | 159997/1000000 [1:32:10<6:12:14, 37.61it/s]global step 160000, trans_decision ep_re 127.2458486003957

{"global_step": 160000, "eval_re": [88.37275944742039, 138.49762865767445, 
33.78727359624383, 34.08353922222331, 32.760757356879296, 401.1656019833916, 
148.64204998331434, 121.84411817918536, 134.4928424503613, 138.81191512726326], 
"eval_len": [65, 92, 36, 39, 35, 176, 96, 95, 91, 95]}

 17%|█▋        | 169997/1000000 [1:38:04<6:04:46, 37.92it/s]global step 170000, trans_decision ep_re 145.2127371133874

{"global_step": 170000, "eval_re": [104.70563834577207, 96.0791882973911, 
118.42488331151041, 135.695127189582, 123.31322021624516, 118.77825320996888, 
118.10609389621942, 310.6689424703186, 108.48983838392256, 217.866185812944], 
"eval_len": [86, 84, 86, 86, 83, 83, 83, 147, 78, 122]}

 18%|█▊        | 179997/1000000 [1:43:55<6:02:16, 37.73it/s]global step 180000, trans_decision ep_re 102.76596959552015

{"global_step": 180000, "eval_re": [101.68781475172925, 62.93138519422234, 
55.4078611731013, 58.78924789231865, 94.51836746898378, 193.14296260717987, 
203.60054062897146, 182.19257087567695, 24.83467006639526, 50.554275296622706], 
"eval_len": [80, 57, 55, 53, 73, 119, 113, 107, 28, 50]}

 19%|█▉        | 189996/1000000 [1:49:45<5:56:54, 37.82it/s]global step 190000, trans_decision ep_re 113.56600814807491

{"global_step": 190000, "eval_re": [63.360188619670105, 29.58878154807636, 
298.9260987942, 107.12264845124088, 292.4505311489431, 154.88368885005954, 
74.45034910165833, 43.42829874610703, 37.8871502811956, 33.56234593959813], 
"eval_len": [61, 39, 155, 80, 132, 87, 68, 46, 36, 41]}

 20%|█▉        | 199999/1000000 [1:55:42<5:53:05, 37.76it/s]global step 200000, trans_decision ep_re 165.29191004321595

{"global_step": 200000, "eval_re": [127.84914285736777, 129.59315795387843, 
188.68165398755298, 114.07577983124185, 184.9828673253772, 229.32307436680802, 
110.51794577169638, 291.71262829066916, 105.15642739173151, 171.0264226558363], 
"eval_len": [96, 91, 104, 86, 105, 118, 83, 147, 86, 101]}

 21%|██        | 209997/1000000 [2:01:35<5:49:40, 37.65it/s]global step 210000, trans_decision ep_re 193.86583577100538

{"global_step": 210000, "eval_re": [103.44062242769523, 491.67145707878643, 
100.7958087996411, 92.80105258028085, 227.93386478048424, 103.29957251882837, 
129.7708722769497, 297.89628492517653, 187.7006699457374, 203.34815237647388], 
"eval_len": [77, 230, 77, 71, 123, 77, 88, 146, 104, 115]}

 22%|██▏       | 219997/1000000 [2:07:27<5:46:57, 37.47it/s]global step 220000, trans_decision ep_re 235.02238810025932

{"global_step": 220000, "eval_re": [649.0475499013355, 46.29060261643134, 
47.46160100139103, 25.9174213173067, 370.8369650407385, 21.60759449176987, 
521.4322194927609, 58.76462078520508, 484.6729890134599, 124.19231734219453], 
"eval_len": [249, 50, 49, 31, 163, 26, 227, 56, 207, 80]}

 23%|██▎       | 229997/1000000 [2:13:30<5:41:05, 37.62it/s]global step 230000, trans_decision ep_re 222.895265668238

{"global_step": 230000, "eval_re": [815.1786382768139, 24.646347469514527, 
292.2073134592625, 127.94729420813675, 127.12002950675146, 137.7959115159131, 
215.15647942783008, 200.7643170723989, 81.78594188533928, 206.3503838604196], 
"eval_len": [366, 34, 138, 95, 91, 102, 112, 122, 58, 117]}

 24%|██▍       | 239996/1000000 [2:19:12<5:37:58, 37.48it/s]global step 240000, trans_decision ep_re 171.9657008479884

{"global_step": 240000, "eval_re": [200.6561829977682, 174.00302695929983, 
58.374700715419664, 147.89220169533755, 94.38484049415847, 315.20612259512615, 
190.37218800689342, 116.96089275411177, 171.9330928945583, 249.87375936721068], 
"eval_len": [122, 109, 62, 96, 70, 152, 116, 93, 113, 140]}

 25%|██▍       | 249999/1000000 [2:25:05<5:31:49, 37.67it/s]global step 250000, trans_decision ep_re 115.06424714677485

{"global_step": 250000, "eval_re": [158.62716147581958, 24.180900195197534, 
88.51809102400033, 118.6470626712233, 117.52870402415442, 119.00069866542528, 
170.64375592235857, 122.26381112959422, 134.92997063605526, 96.30231572391996], 
"eval_len": [100, 28, 80, 93, 92, 91, 104, 91, 85, 68]}

 26%|██▌       | 259999/1000000 [2:30:56<5:25:25, 37.90it/s]global step 260000, trans_decision ep_re 128.9340143325233

{"global_step": 260000, "eval_re": [113.4364289585482, 129.89055731292237, 
203.9524046371174, 118.9303629973407, 138.5448115146514, 146.45670592277816, 
134.8499559013817, 85.00426419321528, 129.72912942519898, 88.54552246207895], 
"eval_len": [90, 95, 115, 91, 96, 91, 93, 82, 91, 74]}

 27%|██▋       | 269999/1000000 [2:36:47<5:22:31, 37.72it/s]global step 270000, trans_decision ep_re 124.30543962791694

{"global_step": 270000, "eval_re": [125.85346692860563, 24.560097845260085, 
184.89313601502244, 60.31397856079267, 294.89520899487786, 44.088612426927014, 
89.47261178808104, 31.160902810216236, 168.7687143660029, 219.04766654338343], 
"eval_len": [95, 31, 105, 74, 141, 49, 68, 34, 100, 115]}

 28%|██▊       | 279999/1000000 [2:42:50<5:18:23, 37.69it/s]global step 280000, trans_decision ep_re 274.4976922738575

{"global_step": 280000, "eval_re": [90.24360173923525, 423.02100085947023, 
354.7083470443494, 814.6636253075877, 272.0606637849334, 26.733124085409315, 
344.1622312948777, 176.68558086352107, 115.19088452538135, 127.50786323381004], 
"eval_len": [69, 171, 176, 314, 132, 33, 150, 99, 83, 88]}

 29%|██▉       | 289996/1000000 [2:48:31<5:12:38, 37.85it/s]global step 290000, trans_decision ep_re 256.53942562591936

{"global_step": 290000, "eval_re": [127.77358852828968, 629.4947235612692, 
181.55455935812134, 347.2565964533171, 100.16615769052224, 711.4879254292854, 
200.61434067427166, 153.30802901734694, 83.08602087923063, 30.652314667539294], 
"eval_len": [100, 233, 108, 155, 92, 265, 112, 103, 81, 32]}

 30%|██▉       | 299999/1000000 [2:54:23<5:11:17, 37.48it/s]global step 300000, trans_decision ep_re 263.14909176238143

{"global_step": 300000, "eval_re": [114.5466370133055, 82.36464979307841, 
107.99230182529504, 454.2127446753664, 582.6966999318688, 814.4746712282023, 
239.62673052260885, 28.843793197912646, 91.69066249370745, 115.04202694246962], 
"eval_len": [83, 76, 85, 184, 221, 289, 131, 32, 63, 86]}

 31%|███       | 309999/1000000 [3:00:16<5:05:52, 37.60it/s]global step 310000, trans_decision ep_re 148.5859257177163

{"global_step": 310000, "eval_re": [185.96484006124595, 186.0427912965888, 
171.25913477428645, 174.2501367514692, 177.78032715163837, 212.31606616434823, 
60.28519816917934, 187.53480909951958, 68.04350959835574, 62.38244411053113], 
"eval_len": [106, 99, 102, 106, 101, 121, 60, 102, 61, 60]}

 32%|███▏      | 319998/1000000 [3:06:20<4:59:50, 37.80it/s]global step 320000, trans_decision ep_re 330.4417463375684

{"global_step": 320000, "eval_re": [471.60080950756117, 360.63003377244036, 
756.1561210925593, 123.44106932179172, 449.3225654913706, 54.07380533548066, 
765.6118698207099, 229.3095026743526, 50.813789545881434, 43.4578968135365], 
"eval_len": [197, 158, 272, 86, 194, 50, 269, 114, 58, 48]}

 33%|███▎      | 329998/1000000 [3:12:01<4:56:51, 37.62it/s]global step 330000, trans_decision ep_re 267.61275358930453

{"global_step": 330000, "eval_re": [382.42137034525905, 207.45142174212936, 
117.76744858301002, 479.0900169664637, 28.247271525360286, 243.54767874514638, 
373.85304266666014, 218.3819532067325, 444.1385424906882, 181.22878962159578], 
"eval_len": [170, 115, 91, 196, 33, 125, 171, 117, 187, 102]}

 34%|███▍      | 339997/1000000 [3:17:54<4:52:32, 37.60it/s]global step 340000, trans_decision ep_re 200.4055616951343

{"global_step": 340000, "eval_re": [260.2565151278493, 27.240581421093907, 
128.45395791218908, 352.9843421433712, 267.7992814765566, 243.87224147533684, 
26.429204688498416, 466.05775858490097, 205.17894906528213, 25.782785056264807],
"eval_len": [132, 34, 89, 146, 147, 135, 29, 184, 112, 31]}

 35%|███▍      | 349997/1000000 [3:23:46<4:48:18, 37.58it/s]global step 350000, trans_decision ep_re 165.6369324065157

{"global_step": 350000, "eval_re": [148.9721485621997, 194.48360766499857, 
144.59985849482138, 198.94321546073886, 187.50380872164988, 175.96324392700842, 
91.86839178641317, 136.7275144098427, 179.02680940656143, 198.28072563092311], 
"eval_len": [108, 119, 104, 112, 108, 105, 86, 98, 98, 114]}

 36%|███▌      | 359997/1000000 [3:29:50<4:45:42, 37.33it/s]global step 360000, trans_decision ep_re 301.4135874432763

{"global_step": 360000, "eval_re": [238.7352436162914, 438.8196043676697, 
83.8473961692965, 440.89265828524697, 468.56165966152736, 193.60306017261559, 
26.959275384874825, 323.1179577335919, 712.304975086996, 87.29404395465316], 
"eval_len": [147, 185, 79, 184, 188, 111, 30, 152, 267, 77]}

 37%|███▋      | 369999/1000000 [3:35:32<4:38:18, 37.73it/s]global step 370000, trans_decision ep_re 204.9307039591025

{"global_step": 370000, "eval_re": [464.9387083709226, 58.79697461999108, 
35.379680151687175, 69.30917027430564, 36.10283975735795, 21.023464009634623, 
169.31886536858443, 386.50481112974137, 47.73574679878359, 760.1967791100163], 
"eval_len": [189, 58, 44, 63, 49, 25, 95, 171, 48, 284]}

 38%|███▊      | 379999/1000000 [3:41:24<4:34:47, 37.61it/s]global step 380000, trans_decision ep_re 159.01059576743012

{"global_step": 380000, "eval_re": [46.89487419984839, 188.99384171488035, 
36.011079919260204, 333.4839578111451, 190.41681413729302, 25.952824283680908, 
63.674386913868325, 334.1273530897117, 185.89869592437736, 184.65212968023584], 
"eval_len": [48, 108, 34, 151, 107, 29, 62, 153, 107, 105]}

 39%|███▉      | 389999/1000000 [3:47:16<4:29:16, 37.76it/s]global step 390000, trans_decision ep_re 70.95845521857369

{"global_step": 390000, "eval_re": [38.91581928603619, 31.614177830337248, 
32.66798659864407, 42.91552967514134, 45.14325043352316, 47.312014450409976, 
180.37072985576754, 185.65842622274303, 47.50356857481249, 57.483049258321934], 
"eval_len": [48, 37, 41, 45, 54, 51, 110, 108, 49, 54]}

 40%|███▉      | 399998/1000000 [3:53:06<4:24:19, 37.83it/s]global step 400000, trans_decision ep_re 140.34562615774087

{"global_step": 400000, "eval_re": [308.2928865489667, 209.89359011346176, 
224.43852831908197, 225.46432111646675, 34.42412013544863, 37.79427464141812, 
33.131514694470106, 46.82131533656825, 252.19963162545483, 30.996079046071564], 
"eval_len": [144, 112, 115, 124, 37, 45, 38, 42, 120, 40]}

 41%|████      | 409998/1000000 [3:58:57<4:21:19, 37.63it/s]global step 410000, trans_decision ep_re 60.95655250467871

{"global_step": 410000, "eval_re": [194.43523981646211, 33.96399308721284, 
25.28561709907003, 34.26509203261986, 133.7018530917054, 43.39867784410928, 
31.924642336249136, 51.96211872756659, 30.476090990219937, 30.15220002157182], 
"eval_len": [108, 38, 35, 40, 100, 47, 37, 53, 36, 32]}

 42%|████▏     | 419998/1000000 [4:04:47<4:18:35, 37.38it/s]global step 420000, trans_decision ep_re 124.64539086535767

{"global_step": 420000, "eval_re": [53.190588052025994, 210.64762167947885, 
45.008104952796145, 37.698988390641375, 40.06902232377441, 51.29918418375838, 
37.42141150677034, 80.87102159665878, 33.75185328344093, 656.4961126842315], 
"eval_len": [50, 109, 50, 43, 47, 50, 44, 65, 33, 224]}

 43%|████▎     | 429998/1000000 [4:10:37<4:11:34, 37.76it/s]global step 430000, trans_decision ep_re 63.92491032688008

{"global_step": 430000, "eval_re": [30.392301260527663, 154.44343981452235, 
32.16993672247855, 49.47006226108139, 30.18471755102453, 81.6758172598572, 
50.44204003024686, 84.18176074425078, 84.97122778138254, 41.317799843428894], 
"eval_len": [34, 90, 34, 52, 31, 72, 51, 72, 72, 50]}

 44%|████▍     | 439998/1000000 [4:16:28<4:06:03, 37.93it/s]global step 440000, trans_decision ep_re 152.858225908551

{"global_step": 440000, "eval_re": [413.8435046184504, 31.920172064316503, 
131.26882532653678, 33.90209273587254, 142.10934374515233, 140.117700074672, 
182.39884723892987, 50.990557283973644, 261.3525414139134, 140.6786745836923], 
"eval_len": [192, 34, 83, 36, 88, 96, 112, 47, 134, 92]}

 45%|████▍     | 449997/1000000 [4:22:20<4:04:22, 37.51it/s]global step 450000, trans_decision ep_re 51.10023623012324

{"global_step": 450000, "eval_re": [43.842150633331, 32.22321350881395, 
35.56422317119854, 82.77763393551908, 35.621522638753774, 57.915729676243636, 
88.06732604019166, 27.129736761266724, 79.05028829547967, 28.810537640434383], 
"eval_len": [48, 36, 42, 70, 40, 58, 74, 35, 70, 36]}

 46%|████▌     | 459997/1000000 [4:28:15<3:59:00, 37.66it/s]global step 460000, trans_decision ep_re 217.3586577590733

{"global_step": 460000, "eval_re": [61.345194497036694, 446.9214036028519, 
762.6242721389877, 23.252619995988358, 477.5170552242182, 79.3397385186236, 
23.917995383147918, 186.2128529770066, 31.574040035489677, 80.88140521738234], 
"eval_len": [56, 185, 276, 29, 191, 69, 27, 115, 35, 72]}

 47%|████▋     | 469997/1000000 [4:34:09<3:59:44, 36.84it/s]global step 470000, trans_decision ep_re 129.76078099729443

{"global_step": 470000, "eval_re": [26.897299706651772, 57.866292640128464, 
34.09670093754489, 484.0929093563508, 25.705260159525945, 65.05253450700066, 
33.16962779903333, 53.46088806014812, 233.95066543354247, 283.3156313730178], 
"eval_len": [34, 58, 44, 187, 27, 62, 33, 59, 131, 137]}

 48%|████▊     | 479997/1000000 [4:40:10<3:59:20, 36.21it/s]global step 480000, trans_decision ep_re 147.21406867375686

{"global_step": 480000, "eval_re": [346.1666488703477, 30.81656425156701, 
47.014709152387404, 71.87376197311815, 324.0374000531919, 163.22724083055346, 
178.4655660612882, 50.90911622303104, 182.04752368339427, 77.58215563868939], 
"eval_len": [151, 33, 51, 71, 147, 95, 98, 50, 98, 76]}

 49%|████▉     | 489996/1000000 [4:45:52<3:50:50, 36.82it/s]global step 490000, trans_decision ep_re 160.43589352255623

{"global_step": 490000, "eval_re": [57.20141395784958, 57.425491452757285, 
215.95380128141016, 23.676019448004432, 201.31256959057472, 403.03447019470826, 
123.27178442853909, 465.55323515782857, 34.15145855085424, 22.778691163035724], 
"eval_len": [53, 52, 116, 27, 107, 178, 89, 184, 34, 25]}

 50%|████▉     | 499996/1000000 [4:51:43<3:47:37, 36.61it/s]global step 500000, trans_decision ep_re 31.24901342620975

{"global_step": 500000, "eval_re": [27.46773860761111, 36.80514128525935, 
27.405156032761546, 34.57064235510462, 19.84932243078251, 35.70567587167848, 
30.711312200599295, 31.136590255485626, 29.295105769450288, 39.54344945336468], 
"eval_len": [37, 42, 35, 44, 25, 44, 36, 34, 37, 46]}

 51%|█████     | 509996/1000000 [4:57:33<3:41:59, 36.79it/s]global step 510000, trans_decision ep_re 155.86317020816995

{"global_step": 510000, "eval_re": [30.9337386713651, 503.84687030786745, 
44.3696410315642, 332.5822532310708, 53.85738718016188, 439.06055672847117, 
44.02410068107413, 39.32901210776968, 26.177160475238413, 44.45098166711661], 
"eval_len": [33, 197, 46, 149, 60, 177, 47, 43, 29, 48]}

 52%|█████▏    | 519996/1000000 [5:03:24<3:38:36, 36.60it/s]global step 520000, trans_decision ep_re 136.6485314512908

{"global_step": 520000, "eval_re": [232.1116412272182, 217.2361080127197, 
281.6781184525574, 30.550602845058812, 335.94895231745807, 68.05537468736146, 
90.5917371442827, 42.657437443181074, 39.68256438613739, 27.972777996933512], 
"eval_len": [126, 113, 150, 34, 158, 67, 78, 44, 38, 36]}

 53%|█████▎    | 529996/1000000 [5:09:15<3:34:10, 36.58it/s]global step 530000, trans_decision ep_re 85.57018943287646

{"global_step": 530000, "eval_re": [41.62777074343911, 24.285824258612855, 
258.02621523374313, 36.05755497668843, 29.024782695026452, 29.23892034981991, 
37.065585543646264, 31.907946460877376, 193.74605097203772, 174.72124309487344],
"eval_len": [46, 27, 129, 44, 36, 38, 48, 39, 111, 100]}

 54%|█████▍    | 539999/1000000 [5:15:06<3:29:41, 36.56it/s]global step 540000, trans_decision ep_re 82.92206733860647

{"global_step": 540000, "eval_re": [36.122669097005115, 58.07286320059733, 
49.10222347404556, 41.773018042279055, 144.57447664693407, 177.49951715660907, 
199.07574883618813, 40.20290972177591, 31.981126201096686, 50.816121009533894], 
"eval_len": [41, 55, 49, 44, 92, 103, 110, 49, 39, 54]}

 55%|█████▍    | 549998/1000000 [5:20:58<3:23:16, 36.90it/s]global step 550000, trans_decision ep_re 192.3576964743778

{"global_step": 550000, "eval_re": [56.218140108330225, 32.98508831748867, 
39.72266647664254, 42.928401526494795, 305.40933217290075, 200.68851389899123, 
46.95210628664237, 725.7537406848762, 32.157924391347514, 440.7610508800638], 
"eval_len": [54, 42, 42, 45, 137, 113, 47, 260, 40, 187]}

 56%|█████▌    | 559997/1000000 [5:27:00<3:18:49, 36.88it/s]global step 560000, trans_decision ep_re 161.13176952184108

{"global_step": 560000, "eval_re": [305.19382186455704, 239.1840722168857, 
299.91196950145, 25.37180401915486, 33.70521891918933, 22.91108572996682, 
340.13599284456086, 119.56511034471795, 26.432028243275784, 198.90659153465245],
"eval_len": [143, 126, 133, 28, 40, 26, 158, 81, 27, 112]}

 57%|█████▋    | 569996/1000000 [5:32:42<3:15:46, 36.61it/s]global step 570000, trans_decision ep_re 119.97866792850041

{"global_step": 570000, "eval_re": [28.306390386705424, 316.1835385125433, 
168.71792084699558, 36.07020528491039, 30.635021941401675, 142.17967770186365, 
108.47544281970852, 294.19892077716867, 39.30028479546797, 35.71927621823905], 
"eval_len": [40, 146, 98, 40, 40, 84, 79, 141, 42, 39]}

 58%|█████▊    | 579996/1000000 [5:38:33<3:10:55, 36.66it/s]global step 580000, trans_decision ep_re 80.70937525982862

{"global_step": 580000, "eval_re": [38.6180408591966, 235.8275610401245, 
45.73186447802146, 39.31112573361894, 25.550268077375495, 28.068527588360983, 
36.74581464509414, 31.923691583877048, 113.26951593143443, 212.04734266118257], 
"eval_len": [43, 117, 48, 40, 29, 32, 38, 41, 77, 113]}

 59%|█████▉    | 589998/1000000 [5:44:24<3:04:25, 37.05it/s]global step 590000, trans_decision ep_re 116.8778983102205

{"global_step": 590000, "eval_re": [45.9102278911607, 47.306940300770655, 
54.04878603698867, 59.706186765362496, 208.26620177256586, 34.45661794813854, 
29.589865779943096, 24.225482870511474, 609.1855267903118, 56.083146946451826], 
"eval_len": [49, 46, 55, 54, 107, 37, 36, 28, 214, 51]}

 60%|█████▉    | 599996/1000000 [5:50:16<2:57:00, 37.66it/s]global step 600000, trans_decision ep_re 72.79795993571794

{"global_step": 600000, "eval_re": [23.92399682975904, 125.04054035189566, 
32.24159522461835, 24.137965373988102, 147.47611867176371, 32.55592177882538, 
37.6375076461483, 48.57265129742913, 225.7044680892169, 30.68883409353491], 
"eval_len": [27, 72, 34, 29, 89, 35, 41, 48, 117, 41]}

 61%|██████    | 609998/1000000 [5:56:08<2:52:25, 37.70it/s]global step 610000, trans_decision ep_re 80.05664899517608

{"global_step": 610000, "eval_re": [194.53155541512714, 33.47034223774919, 
34.06442215261089, 37.80231072153243, 141.89505817476066, 162.7608235433587, 
33.0895177157602, 33.76161553353703, 50.12450175023702, 79.06634270708754], 
"eval_len": [102, 37, 38, 38, 102, 99, 37, 38, 52, 68]}

 62%|██████▏   | 619999/1000000 [6:02:10<2:48:16, 37.64it/s]global step 620000, trans_decision ep_re 160.95407811379246

{"global_step": 620000, "eval_re": [38.17055826234552, 243.29730003802814, 
151.15880578641242, 126.75744809491533, 257.524022127976, 36.03258782522553, 
548.2718909151995, 36.28099903254796, 139.2925702102771, 32.75459884499696], 
"eval_len": [41, 113, 92, 83, 137, 40, 218, 37, 91, 37]}

 63%|██████▎   | 629999/1000000 [6:07:52<2:44:02, 37.59it/s]global step 630000, trans_decision ep_re 34.048883405737264

{"global_step": 630000, "eval_re": [35.556710241863854, 33.22508453569904, 
33.932722369968126, 31.156546765659154, 31.482737503947973, 37.32655197142303, 
30.79903540515354, 35.1804148852795, 31.678629765189775, 40.150400613188644], 
"eval_len": [41, 36, 38, 37, 42, 38, 38, 39, 38, 41]}

 64%|██████▍   | 639998/1000000 [6:13:43<2:39:09, 37.70it/s]global step 640000, trans_decision ep_re 42.4561794112198

{"global_step": 640000, "eval_re": [32.37256264007876, 37.12148211904894, 
33.96694774017619, 61.43575782470158, 41.357032834992005, 40.71637184270923, 
34.70358620857924, 51.10955481592481, 47.170582825547655, 44.60791526043959], 
"eval_len": [38, 39, 35, 58, 46, 46, 39, 49, 50, 48]}

 65%|██████▍   | 649996/1000000 [6:19:33<2:35:19, 37.56it/s]global step 650000, trans_decision ep_re 100.7510546754983

{"global_step": 650000, "eval_re": [326.0246691749739, 40.368466061918674, 
28.15837022613377, 236.63007827661716, 34.68656987522134, 172.82111053881619, 
45.55310116809221, 39.10061537160549, 52.206895915285884, 31.960670146318428], 
"eval_len": [153, 44, 35, 121, 39, 102, 48, 42, 49, 36]}

 66%|██████▌   | 659999/1000000 [6:25:25<2:30:47, 37.58it/s]global step 660000, trans_decision ep_re 52.69401943855161

{"global_step": 660000, "eval_re": [37.7241805222376, 122.45616561743995, 
124.05287272584883, 36.0760565436615, 32.67401278485063, 37.50410471979169, 
37.294380213663054, 30.62720141169267, 34.20210674055575, 34.329113105774454], 
"eval_len": [40, 80, 87, 35, 41, 41, 39, 37, 35, 42]}

 67%|██████▋   | 669999/1000000 [6:31:15<2:25:51, 37.71it/s]global step 670000, trans_decision ep_re 35.14366587233452

{"global_step": 670000, "eval_re": [42.2039480893959, 29.74334562638531, 
36.327897282136604, 28.736318179893544, 36.9250291210132, 40.03777362152289, 
29.18015913104495, 34.05243416978685, 33.86690989434604, 40.36284360781985], 
"eval_len": [39, 35, 40, 36, 45, 40, 37, 39, 37, 41]}

 68%|██████▊   | 679997/1000000 [6:37:05<2:21:16, 37.75it/s]global step 680000, trans_decision ep_re 272.2420079644619

{"global_step": 680000, "eval_re": [49.03485965012997, 784.5880858294429, 
1143.345211564385, 36.27823988957831, 32.79262654680643, 166.57995939490155, 
45.20302768419063, 29.645359830142265, 41.33691230183374, 393.6157969532079], 
"eval_len": [47, 276, 366, 42, 40, 99, 51, 41, 45, 163]}

 69%|██████▉   | 689999/1000000 [6:42:58<2:17:04, 37.69it/s]global step 690000, trans_decision ep_re 85.41731127151466

{"global_step": 690000, "eval_re": [252.64846011684804, 152.194766738917, 
42.269081007347964, 46.225162219027496, 28.898936767212742, 177.35850703478775, 
35.467947320924985, 42.66748030786309, 33.70833933029844, 42.73443187191914], 
"eval_len": [127, 92, 45, 47, 34, 97, 45, 47, 42, 49]}

 70%|██████▉   | 699998/1000000 [6:48:49<2:12:29, 37.74it/s]global step 700000, trans_decision ep_re 91.01366425250788

{"global_step": 700000, "eval_re": [31.25839024399463, 58.21271358879439, 
563.5226095315827, 42.6227812277991, 37.82670575382288, 30.715717350027745, 
35.81040134543088, 48.00724544626546, 26.289251419336146, 35.87082661802491], 
"eval_len": [36, 59, 208, 48, 44, 35, 38, 49, 33, 36]}

 71%|███████   | 709998/1000000 [6:54:39<2:08:37, 37.58it/s]global step 710000, trans_decision ep_re 58.069200150873186

{"global_step": 710000, "eval_re": [26.685704090514257, 24.26578319376122, 
201.20240988728958, 71.07769052064518, 39.546204729896175, 27.185478209587338, 
45.57221804344314, 38.67651548790406, 73.26753975881289, 33.2124575868781], 
"eval_len": [32, 27, 114, 63, 45, 30, 50, 44, 62, 37]}

 72%|███████▏  | 719998/1000000 [7:00:29<2:03:50, 37.68it/s]global step 720000, trans_decision ep_re 68.45650222946337

{"global_step": 720000, "eval_re": [32.478505940776124, 31.637914882522303, 
27.66535840903067, 50.477972975680146, 30.269471168024687, 38.304135042815425, 
184.75069024357714, 29.291200805810735, 36.53897405237135, 223.15079877402505], 
"eval_len": [37, 37, 30, 54, 37, 40, 99, 36, 40, 125]}

 73%|███████▎  | 729998/1000000 [7:06:20<1:58:41, 37.92it/s]global step 730000, trans_decision ep_re 76.57400867368385

{"global_step": 730000, "eval_re": [26.156606632178253, 38.31546921254949, 
30.285734273194908, 41.71428351074877, 38.1332076214272, 38.78245137389437, 
20.219306836326805, 75.07110382608045, 221.4523479888612, 235.6095754615771], 
"eval_len": [28, 40, 41, 42, 41, 44, 24, 65, 113, 134]}

 74%|███████▍  | 739997/1000000 [7:12:12<1:54:41, 37.78it/s]global step 740000, trans_decision ep_re 207.6516768712642

{"global_step": 740000, "eval_re": [120.37978466215877, 860.4206155704901, 
60.20309181112968, 295.9932000773267, 142.4878840895204, 59.36497887570504, 
54.368026719964945, 60.030726552291725, 255.49927137040694, 167.76918898364758],
"eval_len": [82, 286, 58, 130, 88, 57, 58, 59, 125, 101]}

 75%|███████▍  | 749997/1000000 [7:18:04<1:50:18, 37.77it/s]global step 750000, trans_decision ep_re 199.4291537298661

{"global_step": 750000, "eval_re": [39.94677813641755, 139.6709768393701, 
58.79025301809758, 39.25555280643527, 735.6163451775226, 40.49744226291074, 
34.92491702226471, 262.2818730465481, 449.4650938152884, 193.8423051738061], 
"eval_len": [40, 92, 56, 48, 251, 47, 45, 131, 175, 109]}

 76%|███████▌  | 759997/1000000 [7:23:56<1:46:14, 37.65it/s]global step 760000, trans_decision ep_re 84.77154770349287

{"global_step": 760000, "eval_re": [49.479057402151824, 41.70524300088318, 
38.1203052002791, 72.00293290128822, 41.53854700028438, 59.646830569559874, 
447.32661233952905, 42.04669110075343, 32.62269053304888, 23.22656698715071], 
"eval_len": [50, 49, 43, 65, 42, 59, 172, 45, 42, 26]}

 77%|███████▋  | 769996/1000000 [7:29:47<1:41:56, 37.60it/s]global step 770000, trans_decision ep_re 160.33788686390287

{"global_step": 770000, "eval_re": [31.10142274134246, 127.1519622355689, 
215.6774404442533, 187.2150140293383, 223.69401349421847, 358.175595149112, 
381.0391829942062, 24.902612459900407, 34.064735744596376, 20.356889346492146], 
"eval_len": [39, 88, 111, 108, 117, 156, 171, 28, 41, 24]}

 78%|███████▊  | 779996/1000000 [7:35:38<1:37:12, 37.72it/s]global step 780000, trans_decision ep_re 43.746407086602815

{"global_step": 780000, "eval_re": [31.859005425739028, 44.38465885529002, 
57.17652535669321, 50.457291296130144, 35.318716937372926, 30.8740042477521, 
31.844836368297095, 35.47305027235903, 40.6863239196625, 79.38965818673205], 
"eval_len": [40, 44, 54, 52, 40, 35, 42, 41, 43, 65]}

 79%|███████▉  | 789999/1000000 [7:41:28<1:32:44, 37.74it/s]global step 790000, trans_decision ep_re 139.4110281341039

{"global_step": 790000, "eval_re": [246.4020789391176, 100.19982610556465, 
35.40030826285928, 98.59415492519548, 137.53752280291914, 192.25003191721427, 
51.30974939307637, 152.65632115272496, 156.7651987702394, 222.99508907212808], 
"eval_len": [123, 69, 42, 70, 96, 112, 51, 95, 98, 115]}

 80%|███████▉  | 799999/1000000 [7:48:40<1:28:41, 37.58it/s]global step 800000, trans_decision ep_re 113.98734819652145

{"global_step": 800000, "eval_re": [40.60250458076421, 26.331233137036065, 
38.45039179291802, 168.48856222619509, 641.7716882877246, 39.5106936503446, 
42.212204130516916, 29.349786819377524, 72.91679623265692, 40.23962110768075], 
"eval_len": [46, 31, 42, 104, 237, 39, 47, 41, 65, 44]}

 81%|████████  | 809999/1000000 [7:54:31<1:23:35, 37.88it/s]global step 810000, trans_decision ep_re 70.60022540643487

{"global_step": 810000, "eval_re": [72.62012795838608, 59.37394039727295, 
74.74070923747794, 71.78981588193587, 75.8554324837113, 82.02854903086374, 
73.73396753918446, 73.02672495723739, 62.54982111311282, 60.28316546516613], 
"eval_len": [67, 64, 69, 68, 65, 68, 67, 69, 65, 65]}

 82%|████████▏ | 819998/1000000 [8:00:22<1:19:39, 37.66it/s]global step 820000, trans_decision ep_re 141.23767874199243

{"global_step": 820000, "eval_re": [39.14925850835064, 165.3851400618185, 
252.98765118163107, 319.4357727868138, 128.04984513907445, 141.2530098582335, 
165.45851814031926, 29.087650167697696, 46.981267731969574, 124.58867384401576],
"eval_len": [42, 94, 125, 151, 84, 96, 92, 33, 50, 87]}

 83%|████████▎ | 829997/1000000 [8:06:23<1:15:09, 37.70it/s]global step 830000, trans_decision ep_re 84.53570546190817

{"global_step": 830000, "eval_re": [56.136751179813466, 44.599671673953644, 
224.62259023907376, 31.523234011322334, 195.69987543624245, 131.42784253672076, 
38.07265281389005, 58.244559879670554, 28.383169941993813, 36.64670690640071], 
"eval_len": [54, 47, 113, 37, 104, 90, 41, 55, 40, 43]}

 84%|████████▍ | 839996/1000000 [8:12:04<1:11:23, 37.35it/s]global step 840000, trans_decision ep_re 59.90769047290322

{"global_step": 840000, "eval_re": [33.95785092753109, 67.83433350792293, 
54.15465657807792, 49.16819052632092, 44.890564101515615, 45.40455438417489, 
36.590852496173035, 167.02846728264115, 45.05625740934392, 54.99117751533074], 
"eval_len": [35, 67, 52, 51, 50, 47, 42, 95, 47, 52]}

 85%|████████▍ | 849999/1000000 [8:17:54<1:06:22, 37.66it/s]global step 850000, trans_decision ep_re 101.01698736312412

{"global_step": 850000, "eval_re": [27.77915530651386, 30.938284263504485, 
39.65276541864687, 250.33642604702965, 45.64346161553916, 44.37023610274885, 
130.3991881218796, 39.73432333469709, 368.8038400103022, 32.51219341037952], 
"eval_len": [30, 42, 46, 126, 49, 47, 88, 44, 161, 34]}

 86%|████████▌ | 859999/1000000 [8:23:45<1:03:32, 36.72it/s]global step 860000, trans_decision ep_re 69.50352248623514

{"global_step": 860000, "eval_re": [45.600116731577984, 133.6805384071017, 
18.260210573098014, 47.07678749726621, 30.59308547054364, 26.984499452763554, 
118.32752860047002, 115.87756527014054, 115.4608485958343, 43.174044263555565], 
"eval_len": [47, 92, 22, 51, 36, 32, 76, 77, 85, 44]}

 87%|████████▋ | 869999/1000000 [8:29:36<59:03, 36.69it/s]global step 870000, trans_decision ep_re 140.43855157936105

{"global_step": 870000, "eval_re": [189.3497524715595, 147.83087489503913, 
45.094414142312516, 37.13719667387926, 189.68666517257174, 322.4568087485276, 
31.35339689521842, 118.96780360425124, 280.14822501514567, 42.3603781751054], 
"eval_len": [108, 93, 44, 44, 110, 151, 38, 82, 130, 44]}

 88%|████████▊ | 879998/1000000 [8:35:27<54:27, 36.73it/s]global step 880000, trans_decision ep_re 58.91059127633663

{"global_step": 880000, "eval_re": [39.22413997603225, 37.083443077076396, 
42.525835808214204, 52.79282436748667, 40.609792296775986, 51.364035382291505, 
72.63639718548926, 181.84542822578243, 35.80904762849291, 35.21496881572476], 
"eval_len": [48, 40, 46, 52, 37, 54, 65, 106, 40, 43]}

 89%|████████▉ | 889998/1000000 [8:41:17<49:44, 36.86it/s]global step 890000, trans_decision ep_re 138.20787442386302

{"global_step": 890000, "eval_re": [647.9215150315475, 30.516043503653364, 
347.5692345055435, 34.789799488628695, 33.057539554045235, 35.33383798895278, 
40.37157004954524, 43.528379798207396, 35.670003397317906, 133.32082092118839], 
"eval_len": [243, 32, 157, 35, 42, 43, 42, 48, 34, 90]}

 90%|████████▉ | 899998/1000000 [8:47:08<45:27, 36.67it/s]global step 900000, trans_decision ep_re 199.6158317864868

{"global_step": 900000, "eval_re": [47.238597032897424, 451.10661725525904, 
30.30350906737188, 772.5289342858748, 49.24472572450841, 55.289044428284384, 
454.50910510486335, 32.88607223457652, 52.72084575198955, 50.330866979242415], 
"eval_len": [48, 185, 39, 266, 53, 52, 182, 42, 53, 51]}

 91%|█████████ | 909997/1000000 [8:53:00<40:43, 36.84it/s]global step 910000, trans_decision ep_re 104.10539676614223

{"global_step": 910000, "eval_re": [69.74627908730173, 29.44508163310585, 
387.1224772313311, 44.31938027196893, 280.83977688619404, 29.332525879187735, 
34.7493110067135, 102.36898957075806, 30.337071101553107, 32.793074993308295], 
"eval_len": [64, 32, 167, 48, 135, 34, 35, 78, 37, 34]}

 92%|█████████▏| 919997/1000000 [8:58:50<36:25, 36.60it/s]global step 920000, trans_decision ep_re 91.714184812279

{"global_step": 920000, "eval_re": [37.710151853284835, 38.374829238211746, 
40.06683635595893, 30.262797093609514, 33.36158357080776, 300.2384312206496, 
35.60678971915119, 36.69395962204746, 344.51210094539437, 20.314368503674675], 
"eval_len": [43, 45, 46, 33, 38, 139, 36, 37, 160, 23]}

 93%|█████████▎| 929997/1000000 [9:04:41<31:42, 36.79it/s]global step 930000, trans_decision ep_re 109.41130253011927

{"global_step": 930000, "eval_re": [27.185212337782428, 33.28178239522788, 
43.79439720943687, 38.14975088468811, 30.661877480682058, 37.90902830537766, 
236.17273058565482, 411.3280119503004, 35.77627204619425, 199.8539621058483], 
"eval_len": [32, 37, 44, 43, 33, 43, 121, 163, 40, 104]}

 94%|█████████▍| 939997/1000000 [9:10:31<27:10, 36.79it/s]global step 940000, trans_decision ep_re 84.68684826456926

{"global_step": 940000, "eval_re": [29.884650093049572, 32.48688556350031, 
45.92867644812843, 28.35535274607197, 52.96788964474275, 134.03109740940107, 
31.864650987667723, 216.9872482784608, 46.173928229992995, 228.18810324467702], 
"eval_len": [40, 35, 49, 35, 52, 92, 33, 117, 51, 115]}

 95%|█████████▍| 949997/1000000 [9:16:33<22:37, 36.83it/s]global step 950000, trans_decision ep_re 157.07835897025103

{"global_step": 950000, "eval_re": [148.07030140998998, 411.878948915498, 
29.133314642006265, 45.511050996458565, 355.84162096965935, 33.235547579153526, 
49.74518422132861, 365.4363975181187, 74.68452500235286, 57.24669844794437], 
"eval_len": [94, 169, 35, 47, 156, 35, 56, 159, 65, 60]}

 96%|█████████▌| 959996/1000000 [9:22:14<18:19, 36.38it/s]global step 960000, trans_decision ep_re 65.58742553689028

{"global_step": 960000, "eval_re": [61.0678008216364, 44.273823326924045, 
79.93199613975628, 45.98180385865852, 64.12102651914768, 53.679948115810966, 
36.15106763896296, 27.336348972582236, 33.01121893988647, 210.31922103553728], 
"eval_len": [58, 49, 73, 49, 59, 59, 38, 34, 38, 106]}

 97%|█████████▋| 969996/1000000 [9:28:05<13:37, 36.68it/s]global step 970000, trans_decision ep_re 115.09948372934605

{"global_step": 970000, "eval_re": [302.25372577290136, 153.20637539394392, 
43.928289695582045, 135.35879600957705, 136.1359097513727, 131.58198182584837, 
36.896308112261465, 146.22962973467972, 31.41902629749312, 33.98479469980079], 
"eval_len": [148, 94, 49, 93, 87, 90, 44, 94, 40, 40]}

 98%|█████████▊| 979996/1000000 [9:33:57<09:06, 36.57it/s]global step 980000, trans_decision ep_re 95.38179721365988

{"global_step": 980000, "eval_re": [71.33765189434011, 36.519333984055095, 
32.62928405671094, 301.583152581776, 35.17625251798245, 87.63469395612184, 
36.974645286847526, 186.71173616851823, 31.330861272499803, 133.9203604177468], 
"eval_len": [65, 37, 41, 142, 34, 75, 42, 108, 34, 84]}

 99%|█████████▉| 989999/1000000 [9:39:48<04:31, 36.81it/s]global step 990000, trans_decision ep_re 165.81078105442606

{"global_step": 990000, "eval_re": [370.119967208119, 29.010656548097696, 
148.32005421806008, 114.68473603404621, 35.186561247421, 222.43838255451894, 
37.346496896495495, 253.22842403958353, 342.68372117716285, 105.08881062075558],
"eval_len": [176, 38, 101, 87, 41, 125, 39, 131, 146, 84]}

100%|█████████▉| 999998/1000000 [9:45:41<00:00, 36.60it/s]global step 1000000, trans_decision ep_re 152.14168899913517

{"global_step": 1000000, "eval_re": [373.34873241564947, 29.442886991803547, 
56.07065109135285, 444.9478876662062, 34.680932839596935, 60.40815251952519, 
44.925614564363826, 45.132514456299184, 30.121337503315996, 402.3381799432383], 
"eval_len": [165, 33, 56, 174, 45, 55, 49, 45, 34, 171]}

100%|██████████| 1000000/1000000 [9:45:51<00:00, 28.45it/s]
