
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.15
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9998/1000000 [03:20<8:30:35, 32.32it/s]global step 10000, trans_decision ep_re 103.55352993052179

{"global_step": 10000, "eval_re": [129.40387724049933, 12.421848635150814, 
152.21845231340882, 87.26451779053059, 89.08664056359342, 205.91713931530873, 
39.298517187642496, 157.2126061333082, 115.65589807559851, 47.05580205017713], 
"eval_len": [91, 17, 106, 78, 66, 124, 45, 106, 88, 45]}

  2%|▏         | 19997/1000000 [09:57<8:16:45, 32.88it/s]global step 20000, trans_decision ep_re 94.05540559397534

{"global_step": 20000, "eval_re": [58.24695105975087, 80.74417783836603, 
14.887479588019495, 62.31417346605594, 91.21891604522409, 298.97800922695495, 
58.09146954869747, 76.79943957617635, 108.65832819403383, 90.61511139647435], 
"eval_len": [43, 79, 16, 45, 75, 142, 45, 54, 66, 64]}

  3%|▎         | 29996/1000000 [16:35<8:18:08, 32.45it/s]global step 30000, trans_decision ep_re 121.81754136773057

{"global_step": 30000, "eval_re": [14.010813338254268, 217.07746119694727, 
12.919692030430326, 198.70065345670434, 78.88312429340355, 202.98512499616618, 
145.51590641225073, 81.90299802327394, 169.70487565989703, 96.47476426997808], 
"eval_len": [20, 101, 18, 106, 73, 102, 89, 57, 123, 75]}

  4%|▍         | 39997/1000000 [23:13<8:06:50, 32.86it/s]global step 40000, trans_decision ep_re 107.80992403782825

{"global_step": 40000, "eval_re": [137.316562490403, 71.15111888447593, 
197.42414736306722, 93.65778334912692, 113.18369271657828, 58.114008362908734, 
42.717508765959096, 123.63578225867619, 86.76812727302173, 154.13050891406542], 
"eval_len": [81, 49, 114, 67, 62, 55, 40, 83, 64, 89]}

  5%|▍         | 49996/1000000 [29:52<8:01:15, 32.90it/s]global step 50000, trans_decision ep_re 189.8074032250371

{"global_step": 50000, "eval_re": [180.49197854965087, 193.4560702161162, 
142.20046895692994, 235.5524111810233, 143.61642036487893, 131.15799877007527, 
335.1867897493681, 112.48696606314749, 375.4966989048115, 48.42822949436936], 
"eval_len": [123, 142, 81, 154, 112, 95, 192, 102, 196, 44]}

  6%|▌         | 59997/1000000 [36:32<8:02:49, 32.45it/s]global step 60000, trans_decision ep_re 201.10156333671267

{"global_step": 60000, "eval_re": [125.69445995362305, 76.81547647069415, 
234.1401375913851, 146.98270218845732, 134.61129309417583, 185.65002033105753, 
146.1988480440334, 158.88617294544173, 531.9301268493782, 270.1063958988804], 
"eval_len": [79, 57, 146, 105, 86, 115, 86, 94, 253, 175]}

  7%|▋         | 69999/1000000 [43:12<7:50:15, 32.96it/s]global step 70000, trans_decision ep_re 160.35698541872864

{"global_step": 70000, "eval_re": [10.02215198274253, 223.48005937759189, 
353.43791544693875, 160.56038144181292, 154.07605154913597, 8.587190403642218, 
315.38795098692276, 13.07051981205059, 90.61853788624656, 274.3290953002024], 
"eval_len": [12, 142, 209, 93, 84, 12, 219, 15, 77, 169]}

  8%|▊         | 79998/1000000 [49:52<7:53:24, 32.39it/s]global step 80000, trans_decision ep_re 83.0521342359168

{"global_step": 80000, "eval_re": [129.1914655940695, 72.50914303147871, 
80.67860695283846, 40.57959154342755, 63.526739898290764, 105.27345954794919, 
98.10221774136885, 77.71152015793707, 86.60948552987337, 76.33911236193453], 
"eval_len": [83, 54, 68, 47, 55, 78, 55, 49, 78, 44]}

  9%|▉         | 89997/1000000 [56:30<7:41:36, 32.86it/s]global step 90000, trans_decision ep_re 77.40845590797208

{"global_step": 90000, "eval_re": [21.414729881168558, 13.447233214557153, 
47.746770303676506, 38.28521232800633, 149.05263926585923, 89.5909713844443, 
41.5149771881566, 12.353720297004225, 285.2890291950789, 75.38927602176912], 
"eval_len": [20, 17, 46, 67, 100, 82, 33, 15, 141, 88]}

 10%|▉         | 99996/1000000 [1:03:09<7:36:38, 32.85it/s]global step 100000, trans_decision ep_re 85.53396039857267

{"global_step": 100000, "eval_re": [68.82441789974355, 15.84813731516549, 
61.19170767228957, 174.7856390088724, 35.71543254560979, 58.57059720501627, 
88.64690335266904, 13.651367255957103, 196.9552286734093, 141.1501730569943], 
"eval_len": [68, 16, 42, 89, 31, 64, 53, 19, 103, 89]}

 11%|█         | 109999/1000000 [1:09:47<7:30:22, 32.94it/s]global step 110000, trans_decision ep_re 99.81382389493004

{"global_step": 110000, "eval_re": [131.18322915828037, 53.17070402321105, 
36.463241182767966, 12.50455148839427, 36.04183667247912, 43.95468300575085, 
41.38895316661852, 367.3608298268793, 245.92689035800356, 30.14332006691534], 
"eval_len": [84, 53, 47, 22, 33, 37, 37, 161, 143, 27]}

 12%|█▏        | 119999/1000000 [1:16:25<7:32:38, 32.40it/s]global step 120000, trans_decision ep_re 84.60470889866049

{"global_step": 120000, "eval_re": [241.69648681211777, 14.475687185321405, 
50.008604545006406, 46.097302563900236, 15.413870890655556, 62.631541349800536, 
48.39905273232442, 146.3877638345822, 43.07884285876224, 177.85793621413404], 
"eval_len": [114, 18, 51, 43, 18, 60, 59, 82, 45, 105]}

 13%|█▎        | 129999/1000000 [1:23:04<7:28:18, 32.34it/s]global step 130000, trans_decision ep_re 109.15368691546912

{"global_step": 130000, "eval_re": [40.834755342567796, 145.9029127340273, 
43.59939036720247, 12.537550491382682, 15.345721764887406, 131.64250887596356, 
260.2770237072109, 140.39780549701518, 146.97243989037594, 154.02676048405814], 
"eval_len": [37, 100, 40, 21, 16, 78, 137, 84, 80, 84]}

 14%|█▍        | 139998/1000000 [1:29:42<7:16:21, 32.85it/s]global step 140000, trans_decision ep_re 141.56297759913937

{"global_step": 140000, "eval_re": [48.88300663611752, 52.52918953883732, 
210.48074037143508, 192.80559399480296, 106.32861633019319, 213.52421931276905, 
410.43164886960597, 14.320246207849733, 154.4185229379749, 11.907991791807868], 
"eval_len": [53, 54, 96, 94, 60, 106, 190, 15, 79, 17]}

 15%|█▍        | 149997/1000000 [1:36:21<7:11:02, 32.87it/s]global step 150000, trans_decision ep_re 105.53127151406707

{"global_step": 150000, "eval_re": [160.85025870784685, 175.8013851462252, 
272.0172036014125, 27.310689610899257, 13.420476881217335, 23.989637325188472, 
74.0793663623425, 10.71202584629214, 91.73064804258978, 205.4010236166567], 
"eval_len": [94, 120, 122, 27, 17, 25, 74, 14, 58, 107]}

 16%|█▌        | 159996/1000000 [1:43:10<7:06:32, 32.82it/s]global step 160000, trans_decision ep_re 83.8468469691505

{"global_step": 160000, "eval_re": [299.59527821312264, 48.726745564925366, 
160.99788061694994, 147.43851811708925, 12.133202767869207, 49.45488682565797, 
12.6222423560882, 46.87474434639976, 48.91058715271941, 11.714383730683384], 
"eval_len": [157, 57, 105, 85, 14, 39, 20, 52, 49, 18]}

 17%|█▋        | 169999/1000000 [1:49:37<6:59:51, 32.95it/s]global step 170000, trans_decision ep_re 107.05915627907504

{"global_step": 170000, "eval_re": [24.871344749399817, 20.10367830441725, 
79.7979873885511, 175.6469910334832, 167.4636289749699, 158.5114022774016, 
217.00529742128836, 29.298208839014542, 182.7669441676862, 15.126079634538515], 
"eval_len": [25, 24, 77, 111, 88, 92, 114, 38, 114, 21]}

 18%|█▊        | 179999/1000000 [1:56:17<7:04:38, 32.18it/s]global step 180000, trans_decision ep_re 130.50018675003497

{"global_step": 180000, "eval_re": [252.19838731620098, 131.73161387773428, 
23.282741413533305, 129.62742261448895, 167.7809086130742, 19.629492254172668, 
477.7760844391112, 50.672072774510276, 22.41064684609303, 29.892497351431032], 
"eval_len": [113, 75, 26, 80, 88, 27, 180, 39, 28, 32]}

 19%|█▉        | 189998/1000000 [2:02:56<6:51:35, 32.80it/s]global step 190000, trans_decision ep_re 95.86720664669001

{"global_step": 190000, "eval_re": [11.50058250355171, 257.34466094824796, 
32.79942737971832, 204.53327718460554, 96.80773822307752, 17.407155434126935, 
16.235658595018855, 257.1528691471868, 17.294684856941625, 47.59601219442487], 
"eval_len": [17, 154, 29, 118, 82, 23, 17, 127, 19, 55]}

 20%|█▉        | 199997/1000000 [2:09:35<6:45:34, 32.87it/s]global step 200000, trans_decision ep_re 140.24434181069532

{"global_step": 200000, "eval_re": [223.4489901278249, 212.99975269703833, 
181.41691873479184, 193.6956940224932, 109.23186148767579, 97.040971848323, 
128.80745251732188, 12.001837346269129, 72.23705631738706, 171.5628830078282], 
"eval_len": [111, 162, 102, 122, 63, 71, 80, 15, 62, 83]}

 21%|██        | 209999/1000000 [2:16:14<6:37:18, 33.14it/s]global step 210000, trans_decision ep_re 106.03308968765108

{"global_step": 210000, "eval_re": [65.36031693229886, 13.06716660659914, 
172.24019584930258, 65.36633281513397, 10.235430010775348, 141.1740670418091, 
133.03764478414976, 267.14760386821064, 163.77328369080038, 28.92885527743103], 
"eval_len": [46, 18, 99, 60, 14, 88, 81, 157, 89, 36]}

 22%|██▏       | 219998/1000000 [2:22:53<6:35:04, 32.91it/s]global step 220000, trans_decision ep_re 94.0694805993638

{"global_step": 220000, "eval_re": [25.75191657029386, 61.562950016232854, 
153.44105949398661, 11.833186056072586, 12.382989558010753, 78.36256784574921, 
180.04620240300434, 73.58047821515457, 290.7337653486456, 52.99969048648772], 
"eval_len": [31, 54, 78, 17, 17, 62, 96, 62, 134, 59]}

 23%|██▎       | 229997/1000000 [2:29:32<6:37:44, 32.27it/s]global step 230000, trans_decision ep_re 126.90939671419576

{"global_step": 230000, "eval_re": [42.27911260775329, 196.00831765072186, 
11.982168960724776, 225.04097125988227, 111.420705810496, 147.06612171985532, 
215.83282539879656, 151.00592766916034, 151.2432474752102, 17.21456858935695], 
"eval_len": [44, 105, 16, 126, 73, 90, 108, 85, 94, 17]}

 24%|██▍       | 239999/1000000 [2:36:11<6:22:03, 33.15it/s]global step 240000, trans_decision ep_re 87.4908144905471

{"global_step": 240000, "eval_re": [78.88912063512304, 13.614220337334938, 
236.9568527917762, 204.560770740458, 38.31749389536044, 70.37155210197078, 
76.94028382937879, 60.41282814188417, 32.63962224594247, 62.20540018624209], 
"eval_len": [71, 15, 108, 125, 41, 74, 55, 53, 38, 54]}

 25%|██▍       | 249998/1000000 [2:42:50<6:19:40, 32.92it/s]global step 250000, trans_decision ep_re 106.61619330041829

{"global_step": 250000, "eval_re": [42.96392988788958, 65.60971722835185, 
14.837137162702852, 17.545964882133852, 279.0238621351495, 187.34259711000556, 
163.47984585365754, 109.87877659354137, 107.40917371133014, 78.07092843942063], 
"eval_len": [50, 62, 25, 18, 133, 110, 90, 91, 69, 70]}

 26%|██▌       | 259997/1000000 [2:49:40<6:13:52, 32.99it/s]global step 260000, trans_decision ep_re 129.53759517809084

{"global_step": 260000, "eval_re": [204.0188272819685, 10.701344168968776, 
68.58545923125227, 172.64501109345105, 173.2674171734005, 61.930870749955254, 
49.69643932215195, 312.4094618955889, 71.42418910123597, 170.69693176293498], 
"eval_len": [105, 12, 48, 93, 83, 51, 50, 152, 47, 99]}

 27%|██▋       | 269999/1000000 [2:56:08<6:08:24, 33.03it/s]global step 270000, trans_decision ep_re 105.73245342924899

{"global_step": 270000, "eval_re": [135.01228750787985, 101.26820473133023, 
59.758810398235184, 171.7268111206219, 126.4937012042085, 47.137114704947436, 
55.109565789512445, 109.08266798582174, 100.3589394608635, 151.37643138906932], 
"eval_len": [79, 56, 44, 109, 74, 42, 59, 66, 60, 98]}

 28%|██▊       | 279999/1000000 [3:02:48<6:10:24, 32.40it/s]global step 280000, trans_decision ep_re 83.73467657432221

{"global_step": 280000, "eval_re": [188.0878237325136, 16.423755188621595, 
49.93386621700928, 113.82272021048537, 25.74057158768625, 179.61094920898304, 
61.90079963002731, 65.85156348875212, 121.20384308418322, 14.770873394960244], 
"eval_len": [114, 18, 57, 61, 26, 102, 66, 62, 79, 18]}

 29%|██▉       | 289998/1000000 [3:09:27<5:59:09, 32.95it/s]global step 290000, trans_decision ep_re 133.23194623290104

{"global_step": 290000, "eval_re": [126.5873470064874, 407.32706306179597, 
184.90492275756145, 89.84893264304723, 141.45061333725982, 127.12393727193547, 
79.98602602479474, 13.771431621534584, 13.931673486839271, 147.38751511775413], 
"eval_len": [85, 187, 89, 63, 97, 70, 56, 15, 16, 83]}

 30%|██▉       | 299997/1000000 [3:16:06<5:55:24, 32.83it/s]global step 300000, trans_decision ep_re 125.86843594562644

{"global_step": 300000, "eval_re": [27.745151247511536, 151.7965746163066, 
30.601731966892256, 120.2605377100029, 216.4046714930912, 149.25479371565712, 
217.77895270800542, 63.17908596944149, 122.53861354356827, 159.1242464857878], 
"eval_len": [39, 83, 31, 67, 107, 79, 124, 63, 100, 76]}

 31%|███       | 309999/1000000 [3:22:46<5:50:27, 32.81it/s]global step 310000, trans_decision ep_re 156.475260733768

{"global_step": 310000, "eval_re": [65.53537985460162, 34.28833783027263, 
437.5776509019415, 209.14656290871983, 44.03686194188472, 184.50974274838958, 
47.0379202025466, 64.00183683504915, 129.57909889333789, 349.03921522093646], 
"eval_len": [66, 40, 191, 129, 50, 95, 44, 47, 75, 166]}

 32%|███▏      | 319998/1000000 [3:29:26<5:45:07, 32.84it/s]global step 320000, trans_decision ep_re 154.46643072109214

{"global_step": 320000, "eval_re": [178.5110691189917, 335.75893576359164, 
114.33069790984489, 168.8423894072876, 150.30938795919258, 201.20014012812462, 
17.71788345414915, 143.7424104174999, 77.20860877117919, 157.04278428105988], 
"eval_len": [102, 148, 65, 106, 89, 102, 17, 76, 52, 82]}

 33%|███▎      | 329997/1000000 [3:36:06<5:47:45, 32.11it/s]global step 330000, trans_decision ep_re 142.89190148551322

{"global_step": 330000, "eval_re": [12.886102691352614, 158.0164751373478, 
164.75902506333009, 54.31517454903803, 175.72763800677924, 44.44500385124727, 
39.53389145416057, 12.166678455660005, 316.7511243899001, 450.3179012563167], 
"eval_len": [17, 97, 98, 51, 86, 50, 39, 16, 145, 198]}

 34%|███▍      | 339999/1000000 [3:42:45<5:37:24, 32.60it/s]global step 340000, trans_decision ep_re 136.81453043881905

{"global_step": 340000, "eval_re": [83.73339890420867, 325.6650737210212, 
33.9138485908796, 70.87107448360723, 188.8993323596738, 141.31274178268106, 
224.2183113364249, 12.480663642837525, 241.6774327664191, 45.37342680043726], 
"eval_len": [67, 154, 38, 43, 101, 75, 112, 15, 128, 57]}

 35%|███▍      | 349998/1000000 [3:49:24<5:29:03, 32.92it/s]global step 350000, trans_decision ep_re 114.38098111742147

{"global_step": 350000, "eval_re": [163.32348595483197, 133.13894710330558, 
45.76913255323175, 42.72429209416114, 128.9658126192851, 51.28293389364871, 
164.93075460100303, 210.15107664605893, 17.79758105641609, 185.72579465227238], 
"eval_len": [93, 73, 51, 48, 80, 58, 96, 119, 21, 99]}

 36%|███▌      | 359997/1000000 [3:56:04<5:30:33, 32.27it/s]global step 360000, trans_decision ep_re 151.81019188771742

{"global_step": 360000, "eval_re": [207.33579057677963, 200.05465328251648, 
186.82669559095555, 18.92494054334871, 173.1351807671697, 115.7157880273539, 
208.35499054761425, 211.05460949103272, 118.2914514003631, 78.40781865004037], 
"eval_len": [108, 108, 90, 20, 116, 74, 105, 100, 72, 57]}

 37%|███▋      | 369999/1000000 [4:02:45<5:18:01, 33.02it/s]global step 370000, trans_decision ep_re 91.31364486768318

{"global_step": 370000, "eval_re": [129.82250748162411, 13.705975723821952, 
21.373705733294617, 14.265192354157799, 43.635285806194744, 14.028644735370525, 
26.074542643940603, 433.7450167466424, 182.6670548495334, 33.81852260225165], 
"eval_len": [79, 19, 28, 17, 52, 15, 33, 192, 85, 39]}

 38%|███▊      | 379999/1000000 [4:09:23<5:20:55, 32.20it/s]global step 380000, trans_decision ep_re 116.11171225270964

{"global_step": 380000, "eval_re": [179.26471220590656, 108.74053628974374, 
56.28138143263702, 283.1386883000654, 124.9866973578893, 28.421098818805252, 
11.003563173526883, 84.85416173612641, 24.94427169510695, 259.48201151728875], 
"eval_len": [108, 62, 52, 131, 76, 36, 13, 48, 33, 116]}

 39%|███▉      | 389998/1000000 [4:16:03<5:13:58, 32.38it/s]global step 390000, trans_decision ep_re 72.76481655379827

{"global_step": 390000, "eval_re": [13.400744171886345, 128.6237911875409, 
16.095230813763973, 18.052895518232038, 15.166509793717294, 222.67072165413293, 
152.31337546647055, 131.5110512237203, 16.415479483771463, 13.398366224746898], 
"eval_len": [16, 67, 22, 21, 21, 119, 92, 68, 18, 19]}

 40%|███▉      | 399998/1000000 [4:22:41<5:04:40, 32.82it/s]global step 400000, trans_decision ep_re 76.80805246158127

{"global_step": 400000, "eval_re": [110.44274196465733, 15.450194880937733, 
12.032057866197643, 212.44881377674236, 10.062983292586319, 115.55174430123463, 
57.22905361961798, 68.35230268638145, 145.5028415646306, 21.00779066282663], 
"eval_len": [59, 17, 15, 107, 14, 63, 62, 49, 83, 22]}

 41%|████      | 409998/1000000 [4:29:30<5:00:01, 32.77it/s]global step 410000, trans_decision ep_re 83.22409256337014

{"global_step": 410000, "eval_re": [9.484265056939945, 14.024325320016015, 
229.3200371967098, 192.31703508206283, 16.026275563490184, 72.96178005618381, 
181.88114842431668, 13.146935331968733, 91.755113439261, 11.324010162752291], 
"eval_len": [12, 25, 124, 93, 19, 46, 100, 16, 70, 16]}

 42%|████▏     | 419999/1000000 [4:35:56<4:54:17, 32.85it/s]global step 420000, trans_decision ep_re 69.10093601049324

{"global_step": 420000, "eval_re": [16.43187475424576, 205.2428359876925, 
42.09679501872396, 14.819587550576236, 116.30771535700457, 155.68644412412326, 
24.067960378914176, 22.234531752426903, 25.152479406202847, 68.96913577502222], 
"eval_len": [18, 119, 51, 18, 75, 98, 28, 24, 31, 52]}

 43%|████▎     | 429999/1000000 [4:42:32<4:48:09, 32.97it/s]global step 430000, trans_decision ep_re 44.47160299116898

{"global_step": 430000, "eval_re": [11.419571627805151, 15.899453339221642, 
12.512649873513885, 13.023809539075778, 87.19091925529794, 165.94363454336226, 
31.027303586514744, 10.132326353502984, 56.64337796182942, 40.922983831565986], 
"eval_len": [16, 21, 15, 23, 66, 98, 35, 12, 62, 68]}

 44%|████▍     | 439999/1000000 [4:49:08<4:43:23, 32.94it/s]global step 440000, trans_decision ep_re 97.06837516481734

{"global_step": 440000, "eval_re": [15.000731093040683, 423.2997195536593, 
74.16771601573053, 134.38496476225652, 129.30371573590946, 56.18264170826036, 
12.276610892608558, 28.646215657205698, 80.27328621800098, 17.148150011501322], 
"eval_len": [17, 170, 43, 87, 84, 52, 16, 30, 68, 20]}

 45%|████▍     | 449999/1000000 [4:55:45<4:38:02, 32.97it/s]global step 450000, trans_decision ep_re 83.27665461089663

{"global_step": 450000, "eval_re": [164.7952580800828, 85.31418264959386, 
38.781456424226825, 205.58638974869586, 22.39368102801622, 227.12449525781625, 
29.94749935059463, 14.256603280669104, 11.544638334278044, 33.02234195499273], 
"eval_len": [85, 68, 41, 94, 28, 100, 31, 17, 17, 30]}

 46%|████▌     | 459999/1000000 [5:02:22<4:33:48, 32.87it/s]global step 460000, trans_decision ep_re 113.05287436101314

{"global_step": 460000, "eval_re": [12.893687812110958, 139.14269089259716, 
51.6878640071456, 49.82859270750223, 172.98209761509187, 116.78973353076803, 
210.21207355303034, 143.8645768680609, 177.5399189544447, 55.58750766937973], 
"eval_len": [17, 88, 46, 58, 102, 68, 106, 77, 118, 59]}

 47%|████▋     | 469998/1000000 [5:09:10<4:27:50, 32.98it/s]global step 470000, trans_decision ep_re 80.56595085437738

{"global_step": 470000, "eval_re": [47.21603150460224, 33.30601190195045, 
140.6641613853306, 211.63666702412675, 38.6199051382171, 70.94474212952133, 
32.97556250528049, 29.187182952829726, 170.97616777624606, 30.133076225668997], 
"eval_len": [41, 48, 97, 110, 46, 68, 43, 30, 85, 37]}

 48%|████▊     | 479996/1000000 [5:15:36<4:23:49, 32.85it/s]global step 480000, trans_decision ep_re 81.83436758796185

{"global_step": 480000, "eval_re": [92.77899228961492, 81.9964709246658, 
119.01235165446727, 84.31208700468869, 48.08638448504867, 55.053173536407535, 
71.32544665345083, 14.345745661885523, 16.048443548433006, 235.38458012095617], 
"eval_len": [58, 74, 84, 61, 42, 63, 62, 19, 18, 118]}

 49%|████▉     | 489996/1000000 [5:22:13<4:17:54, 32.96it/s]global step 490000, trans_decision ep_re 65.51030277233171

{"global_step": 490000, "eval_re": [12.367892053070866, 87.5490829594638, 
15.743880491213652, 18.859139002325062, 14.651778502266412, 189.53113131558737, 
23.558160875341446, 15.171752130757135, 202.57263042474708, 75.09757996854432], 
"eval_len": [17, 53, 19, 20, 16, 87, 35, 16, 112, 62]}

 50%|████▉     | 499996/1000000 [5:29:00<4:14:05, 32.80it/s]global step 500000, trans_decision ep_re 111.50773197164105

{"global_step": 500000, "eval_re": [421.94828150558027, 11.79200437873176, 
14.80540981800342, 78.95888129177932, 15.23554329325134, 82.94682570115893, 
135.12390417459812, 17.01936215435789, 191.86826174359146, 145.37884565535813], 
"eval_len": [177, 18, 16, 73, 17, 67, 73, 17, 108, 87]}

 51%|█████     | 509999/1000000 [5:35:27<4:08:11, 32.90it/s]global step 510000, trans_decision ep_re 46.11807530475466

{"global_step": 510000, "eval_re": [128.14027643149515, 117.6777498728846, 
29.57946933769368, 24.97667921058413, 17.28352693003201, 17.14071928964249, 
54.83857768017643, 21.025536933978533, 19.90893119359975, 30.609286167459775], 
"eval_len": [76, 68, 29, 28, 23, 21, 50, 23, 22, 29]}

 52%|█████▏    | 519999/1000000 [5:42:04<4:03:21, 32.87it/s]global step 520000, trans_decision ep_re 109.18362092068689

{"global_step": 520000, "eval_re": [138.61876339925797, 169.0436502810232, 
25.70570394132043, 12.781704382351542, 194.76478595309214, 153.5910663981971, 
8.217610928886588, 88.45122679406535, 148.6853309082142, 151.9763662204605], 
"eval_len": [78, 92, 30, 16, 99, 82, 11, 72, 89, 88]}

 53%|█████▎    | 529999/1000000 [5:48:43<3:58:00, 32.91it/s]global step 530000, trans_decision ep_re 120.83308990510298

{"global_step": 530000, "eval_re": [158.7443850572979, 40.4962912015229, 
162.9292603634556, 380.20849035848295, 50.43922923700194, 21.486414336897354, 
18.067248537640666, 230.46022571389113, 128.95742087757546, 16.541933367263628],
"eval_len": [95, 38, 88, 160, 39, 25, 22, 136, 68, 17]}

 54%|█████▍    | 539998/1000000 [5:55:20<3:52:13, 33.01it/s]global step 540000, trans_decision ep_re 100.2438303114658

{"global_step": 540000, "eval_re": [22.205532168573644, 65.92328528738769, 
192.04437168907347, 56.02710359492094, 190.3359029918763, 49.7774099567532, 
168.98715930530872, 182.47126491554008, 46.059119123093176, 28.607154082130787],
"eval_len": [31, 73, 111, 58, 109, 48, 108, 101, 37, 28]}

 55%|█████▍    | 549997/1000000 [6:01:57<3:48:16, 32.85it/s]global step 550000, trans_decision ep_re 168.92601872376278

{"global_step": 550000, "eval_re": [29.828063011518193, 40.38155893577968, 
31.42054533512603, 39.94971914383002, 18.415464058742984, 28.83706064203357, 
51.6719063147628, 340.24047697027055, 948.7798803427885, 159.7355124827756], 
"eval_len": [41, 41, 38, 47, 33, 38, 59, 171, 344, 100]}

 56%|█████▌    | 559996/1000000 [6:08:34<3:42:56, 32.89it/s]global step 560000, trans_decision ep_re 67.47225335031786

{"global_step": 560000, "eval_re": [13.591375102541493, 26.834308088432227, 
47.62277575451596, 86.60202493047306, 19.271909973575294, 14.966736879503147, 
62.5268778032806, 151.0447171214898, 218.14147999815347, 34.120327851213496], 
"eval_len": [17, 40, 45, 55, 27, 17, 75, 99, 99, 41]}

 57%|█████▋    | 569996/1000000 [6:15:20<3:37:39, 32.93it/s]global step 570000, trans_decision ep_re 65.27396574120624

{"global_step": 570000, "eval_re": [97.31318067876389, 23.627776056089754, 
166.77239714791048, 18.382911209699326, 18.07685513965335, 22.690478380874794, 
16.497142989552245, 29.585875798161876, 72.9585225327603, 186.8345174785965], 
"eval_len": [59, 24, 98, 21, 23, 23, 23, 25, 65, 94]}

 58%|█████▊    | 579996/1000000 [6:21:45<3:33:17, 32.82it/s]global step 580000, trans_decision ep_re 120.94141997082264

{"global_step": 580000, "eval_re": [199.97042467789754, 134.62360247852595, 
20.288558068392994, 497.7471080181113, 147.6588088488054, 56.32245760571102, 
20.203934220519496, 89.75121153488143, 19.238062463952655, 23.610031791428565], 
"eval_len": [105, 70, 23, 207, 99, 55, 26, 60, 27, 26]}

 59%|█████▉    | 589999/1000000 [6:28:24<3:27:11, 32.98it/s]global step 590000, trans_decision ep_re 74.97318134655225

{"global_step": 590000, "eval_re": [13.119017028801503, 26.378110240380497, 
43.78613815391117, 11.696321889590314, 20.441367066896976, 247.91382620200002, 
10.041808142110044, 13.71843859579124, 184.9894346992017, 177.64735144683905], 
"eval_len": [16, 27, 42, 18, 22, 120, 15, 19, 108, 93]}

 60%|█████▉    | 599999/1000000 [6:35:00<3:22:45, 32.88it/s]global step 600000, trans_decision ep_re 145.99796960008607

{"global_step": 600000, "eval_re": [43.17901114318145, 69.72645912667981, 
119.88236497638931, 29.192516707108357, 37.1149364221859, 230.4591113077722, 
54.68781407393618, 246.1970462611004, 187.4696349062154, 442.07080107629196], 
"eval_len": [39, 55, 69, 30, 42, 128, 71, 121, 92, 212]}

 61%|██████    | 609999/1000000 [6:41:37<3:18:03, 32.82it/s]global step 610000, trans_decision ep_re 101.84136018104229

{"global_step": 610000, "eval_re": [111.64324717362076, 102.3726980897656, 
109.48587004113773, 119.47640817318712, 15.050700998534465, 188.29913977142968, 
68.38457941028815, 17.533844392934686, 125.53556804070665, 160.63154571881813], 
"eval_len": [61, 73, 64, 83, 22, 83, 44, 20, 72, 97]}

 62%|██████▏   | 619998/1000000 [6:48:13<3:12:24, 32.92it/s]global step 620000, trans_decision ep_re 128.90041466693407

{"global_step": 620000, "eval_re": [211.4161196150597, 158.68269719931317, 
191.25614452258813, 12.364708956215438, 147.73534462940265, 89.09601658012406, 
189.91366815274608, 15.343523141934785, 11.70064082281452, 261.495283049142], 
"eval_len": [108, 99, 104, 15, 86, 51, 111, 17, 17, 139]}

 63%|██████▎   | 629997/1000000 [6:55:00<3:07:34, 32.88it/s]global step 630000, trans_decision ep_re 178.28282143664316

{"global_step": 630000, "eval_re": [13.827826551594603, 361.6737170057885, 
107.29083288417226, 178.29654669332112, 161.29843662321005, 417.8924446736908, 
190.28735476947284, 153.94499047670996, 136.90305120005078, 61.4130134884205], 
"eval_len": [19, 187, 58, 117, 103, 162, 120, 87, 91, 46]}

 64%|██████▍   | 639999/1000000 [7:01:26<3:02:31, 32.87it/s]global step 640000, trans_decision ep_re 72.14963733650572

{"global_step": 640000, "eval_re": [156.6234822036537, 159.28556857129215, 
14.589959671001921, 10.965156926800383, 53.2286398135965, 34.74379835444285, 
174.67193348692595, 94.93543060579876, 11.74207016128442, 10.71033357026056], 
"eval_len": [109, 108, 20, 16, 51, 40, 97, 61, 15, 13]}

 65%|██████▍   | 649999/1000000 [7:08:01<2:55:35, 33.22it/s]global step 650000, trans_decision ep_re 99.79872395855658

{"global_step": 650000, "eval_re": [15.574767695134497, 26.780830837565375, 
25.45890694794922, 208.57287448640562, 326.7376356771255, 119.85326834115257, 
202.7675018165708, 45.0230411454947, 13.938157501863339, 13.280255136304296], 
"eval_len": [19, 36, 36, 99, 153, 75, 123, 50, 16, 19]}

 66%|██████▌   | 659998/1000000 [7:14:50<2:52:44, 32.81it/s]global step 660000, trans_decision ep_re 137.95275199142685

{"global_step": 660000, "eval_re": [134.49177505496039, 33.11428633926811, 
225.49207060755472, 15.690031001427355, 11.37561506474876, 100.90428312384812, 
242.8126175699227, 332.9298824785143, 140.79503910451874, 141.9219195695053], 
"eval_len": [79, 35, 108, 17, 14, 67, 115, 152, 77, 95]}

 67%|██████▋   | 669999/1000000 [7:21:22<2:48:46, 32.59it/s]global step 670000, trans_decision ep_re 115.3601536848192

{"global_step": 670000, "eval_re": [104.17418498560448, 16.458571605077086, 
20.094758050355544, 11.317266015444973, 204.38576934262096, 94.9382703166468, 
128.222592422816, 76.09169265008964, 250.64446996519777, 247.27396149433858], 
"eval_len": [68, 19, 20, 13, 110, 65, 83, 56, 104, 103]}

 68%|██████▊   | 679997/1000000 [7:28:05<2:42:05, 32.90it/s]global step 680000, trans_decision ep_re 58.451431528996565

{"global_step": 680000, "eval_re": [17.00766818457287, 16.716721231122452, 
37.36614356118853, 17.726996065772195, 34.71172532798971, 21.40766215691926, 
49.412438881697994, 329.9944363961334, 44.94744377967794, 15.223079704891422], 
"eval_len": [22, 21, 36, 20, 30, 28, 42, 134, 34, 22]}

 69%|██████▉   | 689997/1000000 [7:34:41<2:36:56, 32.92it/s]global step 690000, trans_decision ep_re 76.76756228895255

{"global_step": 690000, "eval_re": [30.601999690591914, 39.800448609491426, 
31.575511021419352, 14.7499815551693, 11.546425699151222, 195.96591415380018, 
308.06387336973785, 13.009968902076244, 81.93389928672273, 40.42760060136515], 
"eval_len": [27, 42, 27, 17, 18, 96, 158, 14, 57, 32]}

 70%|██████▉   | 699997/1000000 [7:41:18<2:31:03, 33.10it/s]global step 700000, trans_decision ep_re 51.20073012385018

{"global_step": 700000, "eval_re": [176.6050436242474, 12.75679169594289, 
25.410956784067547, 22.320123976223925, 116.45441076683295, 47.378305574041946, 
33.64007596045044, 12.616282953410211, 37.0517760091407, 27.77353389414378], 
"eval_len": [89, 17, 37, 33, 63, 47, 36, 16, 39, 37]}

 71%|███████   | 709997/1000000 [7:47:54<2:35:28, 31.09it/s]global step 710000, trans_decision ep_re 23.334322027547397

{"global_step": 710000, "eval_re": [18.52847875432944, 37.273761320054874, 
19.996524089101413, 21.00924116247006, 15.558003071585926, 12.943706159525673, 
24.459454544312592, 48.67798029413016, 14.835714411966695, 20.060356467997178], 
"eval_len": [22, 36, 23, 25, 19, 21, 25, 43, 22, 22]}

 72%|███████▏  | 719997/1000000 [7:54:31<2:21:17, 33.03it/s]global step 720000, trans_decision ep_re 88.39935689744212

{"global_step": 720000, "eval_re": [50.059623830964256, 40.04511279272984, 
26.863575342295338, 29.0753911755117, 31.301222278522143, 284.5200839079837, 
46.86563375355311, 125.18748653617139, 142.81329619528807, 107.2621431614017], 
"eval_len": [49, 48, 29, 35, 40, 136, 48, 80, 72, 70]}

 73%|███████▎  | 729996/1000000 [8:01:07<2:16:19, 33.01it/s]global step 730000, trans_decision ep_re 87.6351870172032

{"global_step": 730000, "eval_re": [11.387959276039602, 164.0731089091146, 
68.73852538958444, 108.17431309815544, 40.661693456936355, 61.6558093118884, 
13.180840261475403, 147.95729104332003, 206.47363363906362, 54.04869578645405], 
"eval_len": [13, 104, 44, 62, 49, 62, 16, 74, 112, 57]}

 74%|███████▍  | 739999/1000000 [8:07:44<2:11:53, 32.86it/s]global step 740000, trans_decision ep_re 89.98985735203556

{"global_step": 740000, "eval_re": [82.7945085590744, 17.046698787903797, 
14.08734051646467, 108.39682251238759, 91.30431576322884, 53.15415687823034, 
169.55097204299517, 124.0321974367224, 118.70203810808701, 120.82952291526131], 
"eval_len": [69, 16, 25, 66, 67, 52, 86, 65, 84, 67]}

 75%|███████▍  | 749999/1000000 [8:14:20<2:06:49, 32.85it/s]global step 750000, trans_decision ep_re 80.65319364426072

{"global_step": 750000, "eval_re": [147.4061926588746, 137.11575278509844, 
241.91285169956532, 27.346300990312344, 12.218796567531546, 13.769456649477972, 
173.6223222770484, 15.205030330335068, 19.824815749054086, 18.110416735309546], 
"eval_len": [83, 80, 120, 26, 15, 16, 88, 21, 27, 23]}

 76%|███████▌  | 759999/1000000 [8:20:57<2:01:53, 32.81it/s]global step 760000, trans_decision ep_re 100.66537576681493

{"global_step": 760000, "eval_re": [100.87249732096257, 94.16848836646874, 
167.8806896876637, 296.819264524406, 11.733971202118592, 18.726194364071564, 
154.17926323720994, 120.55483766396806, 16.11593292500547, 25.60261837627468], 
"eval_len": [65, 53, 89, 143, 15, 30, 88, 90, 24, 27]}

 77%|███████▋  | 769999/1000000 [8:27:34<1:56:31, 32.90it/s]global step 770000, trans_decision ep_re 152.6680610345393

{"global_step": 770000, "eval_re": [100.47408582939367, 12.754239064735785, 
148.3981212298046, 166.2917125320793, 544.9961476557501, 15.405867881615105, 
58.56465752623475, 168.5449672890318, 29.8228828047989, 281.42792853194914], 
"eval_len": [68, 16, 91, 98, 242, 19, 58, 94, 33, 128]}

 78%|███████▊  | 779998/1000000 [8:34:12<1:51:26, 32.90it/s]global step 780000, trans_decision ep_re 70.65810538727114

{"global_step": 780000, "eval_re": [86.25226582920222, 93.4406922001594, 
10.987620979739884, 25.821935701021726, 168.7479661837951, 17.304472217857587, 
14.488017383804877, 58.13362358207083, 110.35986557497047, 121.04459422008915], 
"eval_len": [60, 60, 13, 29, 83, 19, 18, 51, 66, 73]}

 79%|███████▉  | 789998/1000000 [8:41:00<1:46:07, 32.98it/s]global step 790000, trans_decision ep_re 184.99309649841175

{"global_step": 790000, "eval_re": [98.64147878997336, 193.92581458977662, 
158.24030779760957, 212.8617254220204, 267.45364690960594, 112.8302542727637, 
157.0085164997321, 108.64114355972141, 281.9372631565837, 258.3908139863308], 
"eval_len": [72, 110, 95, 116, 127, 75, 97, 62, 123, 136]}

 80%|███████▉  | 799999/1000000 [8:47:27<1:41:11, 32.94it/s]global step 800000, trans_decision ep_re 57.08741834228839

{"global_step": 800000, "eval_re": [14.480324125053867, 123.56340056427375, 
124.2061269230932, 22.265739497036257, 20.479552659132644, 10.070222173320142, 
98.8250247193409, 45.45672533699314, 102.31685363563227, 9.2102137890078], 
"eval_len": [17, 79, 87, 23, 28, 13, 70, 43, 69, 11]}

 81%|████████  | 809999/1000000 [8:54:03<1:35:39, 33.10it/s]global step 810000, trans_decision ep_re 123.99308178477415

{"global_step": 810000, "eval_re": [149.66680116358873, 13.436852935190931, 
107.35323932522242, 270.19911828845477, 205.93796995928258, 168.5380612866661, 
146.78199918776684, 15.995510346918287, 97.12122667007075, 64.90003868458038], 
"eval_len": [85, 17, 71, 111, 105, 91, 81, 17, 68, 46]}

 82%|████████▏ | 819998/1000000 [9:00:40<1:31:12, 32.89it/s]global step 820000, trans_decision ep_re 63.372867331243526

{"global_step": 820000, "eval_re": [17.871050713993924, 30.25141986344589, 
14.21962994914935, 102.84264234056874, 17.993371243217275, 13.140744165162028, 
11.104727926895594, 158.66467433034526, 194.5780388078031, 73.0623739718541], 
"eval_len": [19, 40, 21, 62, 24, 20, 19, 91, 91, 73]}

 83%|████████▎ | 829998/1000000 [9:07:17<1:26:06, 32.90it/s]global step 830000, trans_decision ep_re 126.36995982886694

{"global_step": 830000, "eval_re": [68.22367158252372, 130.8464853025888, 
255.58511167225626, 137.02199020595987, 63.42314188827842, 162.20575953102912, 
9.693505790829759, 228.65987888951912, 60.705293049683476, 147.33476037600107], 
"eval_len": [70, 76, 129, 82, 64, 81, 16, 108, 50, 76]}

 84%|████████▍ | 839997/1000000 [9:13:55<1:21:18, 32.80it/s]global step 840000, trans_decision ep_re 43.00687706766671

{"global_step": 840000, "eval_re": [18.82847636482537, 14.613660597953182, 
17.288720674922743, 20.091210347681013, 15.761828376055359, 111.17579079286342, 
23.20482252651344, 137.87390129814173, 14.636042005736112, 56.59431769197469], 
"eval_len": [21, 16, 20, 25, 16, 69, 37, 83, 19, 52]}

 85%|████████▍ | 849997/1000000 [9:20:31<1:16:03, 32.87it/s]global step 850000, trans_decision ep_re 55.69763607434682

{"global_step": 850000, "eval_re": [27.468758028015692, 26.297837098098647, 
17.807020044495676, 143.8644741802173, 23.002490971795975, 11.844314133232752, 
62.31869304402235, 16.866876337434796, 37.90247423617641, 189.60342266997856], 
"eval_len": [24, 25, 20, 86, 27, 15, 68, 20, 30, 109]}

 86%|████████▌ | 859996/1000000 [9:27:09<1:11:01, 32.85it/s]global step 860000, trans_decision ep_re 29.185701865909685

{"global_step": 860000, "eval_re": [35.1901720180795, 18.98064085498809, 
29.261976970283225, 28.6743675482939, 32.233029666595755, 25.9914764310015, 
52.12476856616726, 21.808947946504684, 22.74464104630977, 24.846997610873178], 
"eval_len": [30, 30, 42, 30, 33, 36, 46, 28, 33, 32]}

 87%|████████▋ | 869996/1000000 [9:33:47<1:06:11, 32.73it/s]global step 870000, trans_decision ep_re 122.89303795507396

{"global_step": 870000, "eval_re": [24.145659231503465, 86.67267656664926, 
395.42674626807093, 32.21705551425958, 16.47297721216879, 193.36831470052823, 
395.14400627546803, 18.90329738122796, 24.742639422273506, 41.837006978589926], 
"eval_len": [27, 54, 166, 34, 17, 93, 152, 20, 36, 50]}

 88%|████████▊ | 879999/1000000 [9:40:26<1:00:44, 32.93it/s]global step 880000, trans_decision ep_re 91.77399146179056

{"global_step": 880000, "eval_re": [28.67901686534528, 281.71746047809, 
14.795018046247101, 97.50262387733018, 134.10976713570082, 212.63451257312624, 
14.24310233024242, 104.18348114500368, 12.225110302930808, 17.649821863888928], 
"eval_len": [30, 135, 19, 70, 81, 120, 18, 60, 27, 18]}

 89%|████████▉ | 889999/1000000 [9:47:04<55:58, 32.75it/s]global step 890000, trans_decision ep_re 190.19753193544219

{"global_step": 890000, "eval_re": [153.41406750741874, 11.588150367533757, 
321.3048991092714, 130.77659672910485, 437.57721528387754, 361.01897414336673, 
164.7898665698912, 115.18844656150561, 147.5695602119854, 58.74754287046656], 
"eval_len": [86, 19, 132, 83, 169, 174, 84, 73, 94, 36]}

 90%|████████▉ | 899998/1000000 [9:53:44<50:45, 32.83it/s]global step 900000, trans_decision ep_re 65.18024930831855

{"global_step": 900000, "eval_re": [112.92726875487865, 171.91830460205193, 
11.35935835111681, 38.52711241921022, 14.613904919261637, 49.84798379871216, 
54.47109257098722, 45.14676098003001, 117.54210293074385, 35.44860375619309], 
"eval_len": [78, 90, 15, 39, 18, 51, 37, 44, 72, 49]}

 91%|█████████ | 909998/1000000 [10:00:21<45:42, 32.82it/s]global step 910000, trans_decision ep_re 102.27060746088918

{"global_step": 910000, "eval_re": [29.129165002426284, 26.580734679517143, 
208.95395481958616, 188.45035090207006, 34.5814852440138, 9.11205497817825, 
248.3423192717659, 146.8219229058679, 32.45456034692367, 98.27952645854268], 
"eval_len": [30, 34, 104, 85, 49, 13, 122, 75, 33, 54]}

 92%|█████████▏| 919997/1000000 [10:07:10<41:18, 32.27it/s]global step 920000, trans_decision ep_re 86.08186525879455

{"global_step": 920000, "eval_re": [89.77304341153948, 38.36753780982537, 
71.83371463099411, 62.92323927818207, 139.67361778728394, 17.471470457358723, 
207.38888120299183, 11.776310230992294, 178.69377880769235, 42.917058971085446],
"eval_len": [69, 32, 64, 44, 77, 17, 93, 19, 90, 42]}

 93%|█████████▎| 929999/1000000 [10:13:38<35:29, 32.87it/s]global step 930000, trans_decision ep_re 112.64595730538308

{"global_step": 930000, "eval_re": [208.18627501905505, 62.46862888631024, 
12.057384725230945, 159.11776437703008, 50.50925343915464, 174.0466385582634, 
51.15963771749249, 107.19645356950976, 290.59478572011943, 11.122751041664822], 
"eval_len": [104, 53, 13, 83, 40, 93, 49, 87, 119, 14]}

 94%|█████████▍| 939998/1000000 [10:20:16<30:27, 32.84it/s]global step 940000, trans_decision ep_re 140.27524818228702

{"global_step": 940000, "eval_re": [52.99303780673579, 162.33304911176714, 
350.0070736154499, 185.40590134206084, 113.35464303915927, 135.2826569272733, 
11.742388320760167, 174.11312456441374, 133.07643598807445, 84.44417110717532], 
"eval_len": [61, 82, 138, 102, 63, 80, 15, 80, 78, 64]}

 95%|█████████▍| 949997/1000000 [10:26:54<25:23, 32.83it/s]global step 950000, trans_decision ep_re 69.01980356229244

{"global_step": 950000, "eval_re": [117.63418596806825, 26.266014437094526, 
10.147369791418283, 98.8021446094198, 65.79141281725204, 16.40926102971587, 
16.35378018523785, 26.38913657978175, 23.588575869775052, 288.816154335161], 
"eval_len": [72, 38, 18, 72, 51, 23, 22, 30, 28, 136]}

 96%|█████████▌| 959997/1000000 [10:33:31<20:20, 32.78it/s]global step 960000, trans_decision ep_re 80.94120344243304

{"global_step": 960000, "eval_re": [62.323803983689174, 180.84768173867604, 
13.509104235706882, 266.8918371770671, 75.10659073711076, 15.428151483760526, 
99.90640817779767, 13.930649333328514, 14.047124434200866, 67.42068312299283], 
"eval_len": [63, 84, 16, 111, 80, 18, 76, 18, 16, 56]}

 97%|█████████▋| 969997/1000000 [10:40:09<15:11, 32.91it/s]global step 970000, trans_decision ep_re 80.8182347159731

{"global_step": 970000, "eval_re": [65.14588924373746, 13.157813594391962, 
17.217332558154332, 22.671409721614335, 12.161377835180382, 15.541065359487204, 
116.6840513283969, 404.51891649231493, 48.40972142771915, 92.67476959873444], 
"eval_len": [61, 21, 24, 24, 17, 25, 75, 155, 37, 68]}

 98%|█████████▊| 979997/1000000 [10:46:47<10:09, 32.81it/s]global step 980000, trans_decision ep_re 46.46752548133698

{"global_step": 980000, "eval_re": [16.636812249675497, 14.774644846479095, 
11.431835984733269, 181.2345192416362, 12.180959616594789, 14.702827222375026, 
16.07122683797499, 166.79091461205564, 18.979028248658317, 11.872485953186855], 
"eval_len": [24, 16, 17, 97, 15, 18, 22, 92, 26, 16]}

 99%|█████████▉| 989997/1000000 [10:53:24<05:09, 32.36it/s]global step 990000, trans_decision ep_re 60.06893813572932

{"global_step": 990000, "eval_re": [29.80043723558793, 72.11960804097805, 
69.91529439017968, 21.84400809479366, 93.61968784050697, 11.572063895194672, 
38.33542696676432, 184.0342237966715, 61.60577994457227, 17.842851152044087], 
"eval_len": [31, 65, 68, 36, 78, 29, 35, 102, 61, 27]}

100%|█████████▉| 999997/1000000 [11:00:01<00:00, 32.87it/s]global step 1000000, trans_decision ep_re 63.79349658030045

{"global_step": 1000000, "eval_re": [15.11192562351105, 18.39067274062973, 
118.44841005946323, 54.24535522087492, 58.92751390313968, 56.383776196235644, 
49.008142734540954, 13.389718666875027, 148.16366360133023, 105.86578705640412],
"eval_len": [17, 18, 76, 58, 46, 67, 48, 15, 105, 63]}

100%|██████████| 1000000/1000000 [11:00:12<00:00, 25.24it/s]
