
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:10<12:47:17, 21.50it/s]global step 10000, trans_decision ep_re 45.68767272478908

{"global_step": 10000, "eval_re": [17.65856777802754, 102.65212867174596, 
28.197826042437228, 31.52900118610087, 58.392674346804625, 13.064171016558573, 
13.52632460209296, 88.90186017297981, 21.211606117931144, 81.74256731321205], 
"eval_len": [19, 81, 37, 37, 60, 18, 21, 80, 36, 85]}

  2%|▏         | 19997/1000000 [15:50<12:31:17, 21.74it/s]global step 20000, trans_decision ep_re 26.04287434299364

{"global_step": 20000, "eval_re": [20.396562094612374, 32.51961111620665, 
16.51475092832279, 20.68120640671036, 19.07700738163054, 19.594444304909963, 
14.762209288679365, 18.475590606586163, 81.74111240735876, 16.666248894919406], 
"eval_len": [21, 34, 20, 24, 22, 22, 22, 29, 47, 26]}

  3%|▎         | 29998/1000000 [26:10<12:15:25, 21.98it/s]global step 30000, trans_decision ep_re 48.66519166532621

{"global_step": 30000, "eval_re": [67.15685081958692, 78.51972093501617, 
108.0043924671335, 19.1213986534492, 17.11978463701081, 15.863288279900367, 
101.17151127282969, 34.21486360232149, 30.01197846413539, 15.468127521878637], 
"eval_len": [56, 59, 80, 32, 20, 22, 69, 32, 27, 33]}

  4%|▍         | 39997/1000000 [36:30<12:24:36, 21.49it/s]global step 40000, trans_decision ep_re 67.94335625352416

{"global_step": 40000, "eval_re": [27.211689637913206, 113.73441132438239, 
115.27693397959952, 22.02896357492414, 79.22375872906281, 15.505374198328301, 
107.6469450723704, 91.89699636152186, 88.40461172446561, 18.503877932673433], 
"eval_len": [27, 69, 63, 26, 87, 19, 74, 66, 66, 25]}

  5%|▍         | 49999/1000000 [46:50<12:19:08, 21.42it/s]global step 50000, trans_decision ep_re 57.700074453587774

{"global_step": 50000, "eval_re": [84.21080557803087, 16.720400732969757, 
224.14509761625374, 16.78742699023991, 76.82112299403504, 19.081173598042763, 
93.17917916952534, 13.721610586509811, 19.04448021703965, 13.289447053230747], 
"eval_len": [63, 19, 177, 20, 57, 28, 72, 27, 23, 21]}

  6%|▌         | 59998/1000000 [57:20<11:50:18, 22.06it/s]global step 60000, trans_decision ep_re 40.0066753072548

{"global_step": 60000, "eval_re": [82.77759653202979, 92.88968335828612, 
83.10530677247763, 13.561217408373237, 23.933298039663967, 22.695769797907687, 
23.788238843070776, 11.794712854883354, 25.312039920357684, 20.208889545497755],
"eval_len": [52, 74, 53, 16, 31, 32, 28, 14, 23, 26]}

  7%|▋         | 69997/1000000 [1:07:40<12:04:58, 21.38it/s]global step 70000, trans_decision ep_re 39.77009386582134

{"global_step": 70000, "eval_re": [12.389621136658516, 19.424660267172413, 
36.59285496283048, 88.9134221341076, 84.68533571534553, 20.80977535892655, 
85.09232202292166, 17.687036307008512, 15.026295505652685, 17.079615247589416], 
"eval_len": [18, 21, 27, 54, 61, 26, 61, 26, 22, 26]}

  8%|▊         | 79999/1000000 [1:18:00<11:52:36, 21.52it/s]global step 80000, trans_decision ep_re 31.767705548881658

{"global_step": 80000, "eval_re": [14.310316674908776, 14.933266149316832, 
12.998336732993264, 13.951161043563335, 88.52648069237624, 16.33806146048333, 
98.60776960482349, 28.757571421542718, 14.47328248641569, 14.780809222392843], 
"eval_len": [19, 20, 14, 17, 66, 25, 80, 33, 24, 26]}

  9%|▉         | 89997/1000000 [1:28:20<11:35:56, 21.79it/s]global step 90000, trans_decision ep_re 67.16912590581737

{"global_step": 90000, "eval_re": [14.752531835966863, 160.91239080882187, 
16.48501304644615, 102.33200924296136, 115.07901970170964, 115.59188416637951, 
16.0614386548983, 92.08816189676406, 21.6127889535626, 16.776020750663427], 
"eval_len": [32, 109, 22, 97, 95, 97, 20, 71, 22, 25]}

 10%|▉         | 99998/1000000 [1:38:21<11:21:46, 22.00it/s]global step 100000, trans_decision ep_re 27.528399054361575

{"global_step": 100000, "eval_re": [12.8168371403986, 28.324563064629277, 
16.592267508574963, 12.386948356427046, 120.83789890324483, 18.664081021243568, 
19.976556562510115, 14.955384982080263, 10.673728649249114, 20.055724355257976],
"eval_len": [19, 32, 20, 17, 105, 25, 28, 29, 19, 24]}

 11%|█         | 109999/1000000 [1:48:50<11:26:11, 21.62it/s]global step 110000, trans_decision ep_re 48.81027718600041

{"global_step": 110000, "eval_re": [73.91858960312734, 15.360781329068686, 
132.3777237415655, 19.45890463695553, 123.31640969857666, 19.231885850359316, 
12.47412629026432, 14.006695253259695, 35.24508013722202, 42.71257531960498], 
"eval_len": [62, 22, 93, 31, 101, 20, 30, 15, 36, 51]}

 12%|█▏        | 119999/1000000 [1:59:10<11:16:34, 21.68it/s]global step 120000, trans_decision ep_re 32.72727761770926

{"global_step": 120000, "eval_re": [24.252988622740826, 18.742794496492, 
19.81890242410493, 23.122448866101458, 124.50144257864225, 30.947232681529538, 
26.815233509765832, 14.861469544749719, 16.632918757302736, 27.57734469566332], 
"eval_len": [25, 20, 23, 26, 95, 34, 30, 19, 22, 25]}

 13%|█▎        | 129997/1000000 [2:09:30<11:03:41, 21.85it/s]global step 130000, trans_decision ep_re 28.789001964388724

{"global_step": 130000, "eval_re": [25.444193373907908, 79.42275792880945, 
27.613018359605963, 26.406259507569505, 28.615368217853646, 15.17633448374018, 
25.835762821948368, 21.153577743164462, 21.706034489669317, 16.516712717618393],
"eval_len": [26, 60, 27, 31, 31, 19, 23, 23, 22, 26]}

 14%|█▍        | 139999/1000000 [2:19:40<10:56:51, 21.82it/s]global step 140000, trans_decision ep_re 21.56083383885958

{"global_step": 140000, "eval_re": [11.126344866917904, 28.487242756329792, 
22.378774153234495, 20.814497131310475, 14.440613003190792, 17.903580745859333, 
28.879815828226256, 27.52116831112484, 18.759439798658573, 25.296861793743325], 
"eval_len": [14, 31, 36, 24, 29, 19, 27, 30, 20, 30]}

 15%|█▍        | 149997/1000000 [2:30:00<10:46:45, 21.90it/s]global step 150000, trans_decision ep_re 37.75206390336527

{"global_step": 150000, "eval_re": [26.786385649209617, 17.61363790878657, 
27.934748143940595, 37.703379398671544, 129.28478978620205, 18.266962009172623, 
19.682813450640378, 12.464448902168662, 74.9818087679583, 12.80166501690234], 
"eval_len": [29, 24, 29, 34, 79, 19, 25, 16, 67, 21]}

 16%|█▌        | 159999/1000000 [2:40:10<10:34:03, 22.08it/s]global step 160000, trans_decision ep_re 49.075824477339

{"global_step": 160000, "eval_re": [78.14413921124802, 21.061267576901372, 
16.13648392360684, 17.464471282962457, 10.494438888542776, 17.01026361270962, 
11.841229960966135, 99.39691874097215, 124.93242543333446, 94.27660614214616], 
"eval_len": [63, 27, 22, 29, 13, 24, 17, 69, 65, 68]}

 17%|█▋        | 169999/1000000 [2:50:20<10:32:06, 21.88it/s]global step 170000, trans_decision ep_re 34.27183548213083

{"global_step": 170000, "eval_re": [14.591467971211362, 14.997348619059338, 
19.08118594988539, 15.764876992492052, 22.88464520979961, 169.62843759083907, 
22.3859473201797, 20.370561703718305, 20.565258781173657, 22.44862468294982], 
"eval_len": [19, 24, 26, 21, 33, 112, 21, 25, 25, 34]}

 18%|█▊        | 179997/1000000 [3:00:22<10:18:55, 22.08it/s]global step 180000, trans_decision ep_re 53.38430405157434

{"global_step": 180000, "eval_re": [15.818876124194398, 31.319773610997764, 
83.41060850178972, 16.24530195133592, 22.507465370063155, 23.43191856616804, 
10.184677362320901, 124.65048522684323, 17.993244980354316, 188.28068882167597],
"eval_len": [21, 31, 62, 20, 31, 26, 17, 85, 25, 110]}

 19%|█▉        | 189998/1000000 [3:10:50<10:12:44, 22.03it/s]global step 190000, trans_decision ep_re 65.92288793093807

{"global_step": 190000, "eval_re": [114.03724799181799, 17.528509319011334, 
11.213867780792315, 194.66715757324505, 140.46410146714075, 34.468326255916026, 
9.276151700791912, 108.94592578147116, 12.181465435283165, 16.44612600391114], 
"eval_len": [84, 23, 16, 118, 92, 31, 13, 83, 15, 20]}

 20%|█▉        | 199999/1000000 [3:21:00<10:05:59, 22.00it/s]global step 200000, trans_decision ep_re 41.00519409940728

{"global_step": 200000, "eval_re": [13.5263775709903, 13.669542487598841, 
26.67239102651525, 10.734734638447863, 93.27363728479229, 86.97194307415432, 
118.9484576498818, 13.603576611855756, 18.45879701450293, 14.19248363533341], 
"eval_len": [15, 21, 28, 12, 74, 64, 97, 19, 29, 16]}

 21%|██        | 209999/1000000 [3:31:10<10:01:29, 21.89it/s]global step 210000, trans_decision ep_re 30.529272449594657

{"global_step": 210000, "eval_re": [25.668884232815937, 18.25842444200513, 
12.355226098117909, 21.425319924112074, 17.306391913194563, 131.1756522183718, 
20.27401080274225, 22.336338537067235, 16.209943730807794, 20.282532596711896], 
"eval_len": [33, 23, 34, 26, 22, 109, 32, 28, 24, 37]}

 22%|██▏       | 219999/1000000 [3:41:10<9:54:11, 21.88it/s]global step 220000, trans_decision ep_re 37.001010024871775

{"global_step": 220000, "eval_re": [27.868746548308057, 10.370731035044638, 
12.625364048374836, 104.7083021748666, 18.077468961629503, 30.862813312486853, 
36.761468520746064, 12.964087751377074, 86.41722174010332, 29.353896155780834], 
"eval_len": [24, 17, 16, 69, 25, 31, 60, 18, 64, 37]}

 23%|██▎       | 229999/1000000 [3:51:40<9:41:58, 22.05it/s]global step 230000, trans_decision ep_re 24.943602626012876

{"global_step": 230000, "eval_re": [24.175353631412644, 27.29939167521998, 
18.31225095407924, 19.461071914610482, 21.310094139271964, 39.3262529546027, 
26.535809833648045, 27.412874425403654, 23.503525941845602, 22.099400790034455],
"eval_len": [26, 29, 33, 28, 29, 47, 31, 28, 25, 24]}

 24%|██▍       | 239999/1000000 [4:01:50<9:37:14, 21.94it/s]global step 240000, trans_decision ep_re 63.95355112656038

{"global_step": 240000, "eval_re": [89.0758127605169, 30.155371574926416, 
116.19636310983753, 95.30953901141879, 101.87999950395024, 17.141393581056967, 
26.281797920468396, 19.240723167680752, 130.33342451105787, 13.921086124689865],
"eval_len": [87, 27, 60, 60, 85, 19, 28, 31, 97, 20]}

 25%|██▍       | 249999/1000000 [4:12:00<9:29:41, 21.94it/s]global step 250000, trans_decision ep_re 29.097415411493557

{"global_step": 250000, "eval_re": [14.274571861273445, 30.25950717516497, 
37.243960931162086, 16.127929451437957, 30.837593480257354, 20.580812695676197, 
9.609538653147112, 13.15303385770872, 104.62044612118456, 14.266759887923161], 
"eval_len": [17, 32, 30, 23, 30, 22, 23, 22, 86, 25]}

 26%|██▌       | 259999/1000000 [4:22:10<9:20:37, 22.00it/s]global step 260000, trans_decision ep_re 29.570503354780225

{"global_step": 260000, "eval_re": [15.354515729229758, 25.374793273140515, 
18.571867270036734, 103.01818012475897, 12.727047442786265, 18.769104179977752, 
24.47320770340389, 15.617950364347513, 45.581375892518004, 16.21699156760286], 
"eval_len": [18, 31, 21, 120, 18, 20, 32, 32, 66, 17]}

 27%|██▋       | 269997/1000000 [4:32:12<9:18:41, 21.78it/s]global step 270000, trans_decision ep_re 45.793789201934075

{"global_step": 270000, "eval_re": [21.07791346419985, 111.3544865280288, 
33.91079936531111, 88.38637244121577, 19.59724910120057, 14.802443032438893, 
116.36013783828875, 15.098640644806865, 12.422410403747874, 24.927439200102306],
"eval_len": [21, 75, 39, 81, 18, 15, 68, 19, 16, 29]}

 28%|██▊       | 279998/1000000 [4:42:40<9:02:15, 22.13it/s]global step 280000, trans_decision ep_re 45.01394388097222

{"global_step": 280000, "eval_re": [18.220405280710608, 13.458893069419927, 
15.95746080716171, 26.476867734373705, 18.065741433643336, 154.14261243289312, 
17.910149324699848, 17.465267828459417, 136.5674685404456, 31.874572357914992], 
"eval_len": [20, 18, 27, 33, 25, 94, 29, 20, 74, 35]}

 29%|██▉       | 289998/1000000 [4:52:50<8:54:24, 22.14it/s]global step 290000, trans_decision ep_re 62.20373522952944

{"global_step": 290000, "eval_re": [18.347893533251337, 94.7634820638355, 
17.042761449821374, 25.926213777383165, 98.97753284208925, 11.05195514893546, 
82.59146450161762, 18.90671405726887, 102.31483714268313, 152.11449777840875], 
"eval_len": [26, 61, 28, 31, 62, 14, 64, 26, 77, 92]}

 30%|██▉       | 299999/1000000 [5:03:00<8:49:31, 22.03it/s]global step 300000, trans_decision ep_re 35.25076240609384

{"global_step": 300000, "eval_re": [16.633839425446812, 13.447340919629314, 
32.51613203278108, 24.829732336387842, 29.459607412935306, 21.73711498888634, 
168.7500680194231, 10.65093648448472, 20.669805413809545, 13.813047027154315], 
"eval_len": [23, 28, 33, 30, 26, 28, 80, 13, 25, 18]}

 31%|███       | 309999/1000000 [5:13:10<8:39:13, 22.15it/s]global step 310000, trans_decision ep_re 64.48155493647477

{"global_step": 310000, "eval_re": [22.681851093313014, 10.031390030674082, 
26.688316733237478, 15.503811290010447, 105.22375367347223, 110.43176469396222, 
20.79814145722262, 22.292649448946072, 169.5955147902423, 141.56835615366717], 
"eval_len": [23, 16, 23, 21, 82, 82, 23, 23, 112, 91]}

 32%|███▏      | 319999/1000000 [5:23:00<8:31:33, 22.15it/s]global step 320000, trans_decision ep_re 64.88430865957203

{"global_step": 320000, "eval_re": [104.83242288485202, 20.21437223221232, 
30.063676981670078, 13.810399285395905, 16.731867704770092, 191.63524011486734, 
144.29951436312635, 13.44429911171225, 95.76746010187661, 18.043833815237292], 
"eval_len": [98, 22, 30, 19, 26, 118, 117, 19, 66, 29]}

 33%|███▎      | 329999/1000000 [5:33:20<8:26:50, 22.03it/s]global step 330000, trans_decision ep_re 85.49312083946685

{"global_step": 330000, "eval_re": [126.96873291124923, 9.449762251755352, 
34.586067879573136, 9.087741012459613, 204.65282397743584, 18.436828538813785, 
252.59658995878863, 25.154170529436705, 40.74481473216665, 133.25367660298969], 
"eval_len": [84, 12, 39, 16, 123, 25, 146, 34, 39, 81]}

 34%|███▍      | 339999/1000000 [5:43:30<8:16:37, 22.15it/s]global step 340000, trans_decision ep_re 46.09772974440941

{"global_step": 340000, "eval_re": [27.859194140917957, 58.91842205095089, 
108.02866710780643, 15.15593956397441, 34.546925395692014, 32.57452002431625, 
39.74175226439706, 100.60831899779706, 10.359455093798426, 33.184102804443626], 
"eval_len": [39, 39, 89, 16, 40, 27, 40, 59, 14, 33]}

 35%|███▍      | 349998/1000000 [5:53:40<8:05:35, 22.31it/s]global step 350000, trans_decision ep_re 50.16811632412475

{"global_step": 350000, "eval_re": [65.37356032307214, 13.870425418139842, 
16.983218825062792, 21.107411107732464, 18.24514926149502, 20.66647783064159, 
140.9010810672366, 15.781092120013657, 121.9102361712133, 66.84251111664015], 
"eval_len": [55, 17, 21, 30, 22, 27, 85, 19, 80, 43]}

 36%|███▌      | 359999/1000000 [6:03:32<8:05:15, 21.98it/s]global step 360000, trans_decision ep_re 38.24485354874902

{"global_step": 360000, "eval_re": [29.54395492493769, 11.405857863782206, 
197.74827065557292, 16.27073301384741, 24.866575524924283, 27.467759686041127, 
20.805261143351846, 9.448888955277614, 31.763447306181305, 13.127786413573793], 
"eval_len": [33, 14, 98, 20, 30, 25, 31, 38, 33, 19]}

 37%|███▋      | 369999/1000000 [6:13:50<7:55:51, 22.07it/s]global step 370000, trans_decision ep_re 54.34033609964642

{"global_step": 370000, "eval_re": [19.81105877814959, 87.58331274724827, 
20.620634683983084, 17.38640942879607, 20.658568626793798, 12.982116515410429, 
23.039119769299557, 23.423561536247014, 296.0564259132378, 21.84215299729867], 
"eval_len": [32, 69, 25, 17, 27, 22, 23, 22, 163, 25]}

 38%|███▊      | 379999/1000000 [6:24:00<7:47:40, 22.10it/s]global step 380000, trans_decision ep_re 29.757562201459972

{"global_step": 380000, "eval_re": [35.455708601300024, 27.640100414196727, 
64.20509894819594, 79.72892056974062, 11.498130696964274, 16.763982400329883, 
22.36205209272388, 12.77050546886125, 13.53460440936742, 13.616518412919662], 
"eval_len": [38, 26, 56, 54, 19, 20, 23, 16, 15, 21]}

 39%|███▉      | 389997/1000000 [6:34:10<7:39:37, 22.12it/s]global step 390000, trans_decision ep_re 20.321136317003713

{"global_step": 390000, "eval_re": [22.946998070674013, 29.62517978565473, 
16.169288794228187, 13.395747279371143, 35.19158353607715, 22.960142175941446, 
12.230594065593225, 19.53385114126449, 15.329604105293793, 15.82837421593897], 
"eval_len": [25, 26, 27, 26, 33, 31, 24, 21, 20, 19]}

 40%|███▉      | 399999/1000000 [6:44:00<7:33:46, 22.04it/s]global step 400000, trans_decision ep_re 55.76171316797148

{"global_step": 400000, "eval_re": [190.67134070303712, 74.79376927535587, 
94.38220043052294, 30.586343356795076, 18.771560902052265, 23.630161844828702, 
18.59519402722678, 24.045972321862664, 67.97153525701886, 14.169053561014476], 
"eval_len": [120, 82, 92, 29, 20, 29, 24, 23, 72, 15]}

 41%|████      | 409999/1000000 [6:54:20<7:20:43, 22.31it/s]global step 410000, trans_decision ep_re 23.277544481746258

{"global_step": 410000, "eval_re": [17.861864383693568, 14.24246159954906, 
18.41854000910727, 25.29012494546159, 15.093168781618049, 11.454882446529423, 
75.61703551798398, 19.20967111244259, 12.805880692657958, 22.781815328419114], 
"eval_len": [19, 18, 21, 26, 22, 16, 81, 21, 17, 27]}

 42%|████▏     | 419997/1000000 [7:04:30<7:14:44, 22.24it/s]global step 420000, trans_decision ep_re 35.07392324954809

{"global_step": 420000, "eval_re": [13.017556141754449, 19.24042576876364, 
25.293752523670936, 24.929032873253036, 13.205660845399558, 22.49575670169599, 
116.48061625872914, 91.36956078689389, 10.456036748065241, 14.250833847255027], 
"eval_len": [17, 23, 34, 25, 24, 20, 96, 59, 19, 19]}

 43%|████▎     | 429999/1000000 [7:14:22<7:08:00, 22.20it/s]global step 430000, trans_decision ep_re 55.212214424223205

{"global_step": 430000, "eval_re": [197.45800293387666, 18.96565424774603, 
30.274169321985777, 22.055257521796275, 91.93045364655055, 19.81110516214555, 
80.57812950401312, 47.04738586001044, 22.017715868768892, 21.984270175338835], 
"eval_len": [124, 29, 32, 22, 70, 32, 55, 49, 20, 27]}

 44%|████▍     | 439999/1000000 [7:24:40<7:03:36, 22.03it/s]global step 440000, trans_decision ep_re 41.62192566045913

{"global_step": 440000, "eval_re": [13.913815631893216, 28.43811344601661, 
19.126335364581173, 18.03368709037971, 37.1096198042456, 18.496837015725678, 
12.048286755875422, 13.694762586222597, 206.70049796388136, 48.65730094576984], 
"eval_len": [18, 31, 26, 26, 38, 23, 16, 20, 95, 49]}

 45%|████▍     | 449999/1000000 [7:34:50<6:54:39, 22.11it/s]global step 450000, trans_decision ep_re 30.92448960499926

{"global_step": 450000, "eval_re": [18.478924446449284, 22.524535260675812, 
15.608231255944917, 76.80345001383981, 23.111166718671985, 15.707384088613471, 
30.15530836545138, 18.575562757734296, 14.25154410129055, 74.02878904132108], 
"eval_len": [19, 29, 18, 81, 25, 19, 28, 22, 17, 49]}

 46%|████▌     | 459999/1000000 [7:45:00<6:46:43, 22.13it/s]global step 460000, trans_decision ep_re 40.018506460271105

{"global_step": 460000, "eval_re": [83.77824438509795, 21.582251409889995, 
12.672274325226955, 12.300740052688553, 123.70582291472283, 21.064475224624154, 
87.44655162858402, 10.840439116278251, 11.93633224435951, 14.857933301238832], 
"eval_len": [59, 28, 15, 17, 75, 28, 72, 20, 17, 34]}

 47%|████▋     | 469997/1000000 [7:54:52<6:41:21, 22.01it/s]global step 470000, trans_decision ep_re 48.366748312933254

{"global_step": 470000, "eval_re": [32.651042981347814, 17.83059800363785, 
16.917767367228617, 155.6499972555091, 130.80242372849585, 76.10298169712306, 
11.470097795881271, 12.703142866219812, 14.00895199547294, 15.530479438416226], 
"eval_len": [30, 26, 23, 95, 88, 59, 16, 15, 25, 21]}

 48%|████▊     | 479999/1000000 [8:05:10<6:31:21, 22.15it/s]global step 480000, trans_decision ep_re 46.58912550542999

{"global_step": 480000, "eval_re": [39.421427014001, 16.108051873653483, 
85.39825243563551, 90.92068842701552, 111.27688661480366, 29.92302774541465, 
27.61228899052298, 14.09188348064122, 25.87565725099812, 25.263091221613728], 
"eval_len": [37, 19, 85, 61, 84, 43, 28, 16, 27, 40]}

 49%|████▉     | 489999/1000000 [8:15:20<6:20:52, 22.32it/s]global step 490000, trans_decision ep_re 41.696752255500925

{"global_step": 490000, "eval_re": [12.00918849548867, 24.597637715936568, 
39.31200528392552, 10.93953233034085, 30.651231985271654, 15.810139753535639, 
15.94225977163503, 137.14233400210261, 43.59879321351248, 86.96440000326028], 
"eval_len": [18, 24, 45, 13, 30, 25, 18, 93, 42, 66]}

 50%|████▉     | 499997/1000000 [8:25:30<6:18:08, 22.04it/s]global step 500000, trans_decision ep_re 86.52405006516706

{"global_step": 500000, "eval_re": [181.1243665261919, 12.784069184358128, 
16.847804137517112, 81.9973934413037, 80.72915796529841, 25.08913874510895, 
18.826301521585428, 227.88161266388042, 17.660028150578714, 202.30062831584794],
"eval_len": [97, 15, 22, 56, 87, 30, 24, 137, 31, 108]}

 51%|█████     | 509997/1000000 [8:35:22<6:07:33, 22.22it/s]global step 510000, trans_decision ep_re 49.181914886732336

{"global_step": 510000, "eval_re": [12.304651200302894, 227.0551353095681, 
13.733994480278342, 33.65689393599985, 17.13667350475503, 20.084834553131614, 
20.225612148669207, 25.91904833264722, 109.22561967923045, 12.476685722740685], 
"eval_len": [21, 132, 23, 32, 20, 24, 23, 30, 89, 18]}

 52%|█████▏    | 519999/1000000 [8:45:40<6:02:42, 22.06it/s]global step 520000, trans_decision ep_re 42.141965266321414

{"global_step": 520000, "eval_re": [13.473769870111305, 17.34538075762462, 
15.805516510976739, 124.94964229679638, 96.18089426500335, 15.613866754158327, 
18.252585564418023, 26.74861833176983, 12.658341989137496, 80.39103632321807], 
"eval_len": [16, 20, 28, 88, 72, 22, 20, 28, 15, 88]}

 53%|█████▎    | 529999/1000000 [8:55:50<5:53:43, 22.15it/s]global step 530000, trans_decision ep_re 40.07704992460238

{"global_step": 530000, "eval_re": [10.036374989585479, 27.845570164608088, 
41.395805291062445, 14.325106919561133, 95.69551545830885, 102.95901966399863, 
46.79657842458671, 12.088725681447828, 24.824027568921586, 24.80377508394309], 
"eval_len": [14, 27, 44, 16, 85, 82, 46, 18, 32, 31]}

 54%|█████▍    | 539997/1000000 [9:06:00<5:47:17, 22.08it/s]global step 540000, trans_decision ep_re 48.32466325933644

{"global_step": 540000, "eval_re": [83.57564643136257, 131.01527489378788, 
20.436463502662814, 14.133589320443022, 124.03022891908996, 41.03628435120414, 
19.959631928014844, 21.301247166212573, 11.534971276977755, 16.223294803608834],
"eval_len": [91, 99, 34, 18, 77, 69, 20, 25, 15, 21]}

 55%|█████▍    | 549999/1000000 [9:15:52<5:39:04, 22.12it/s]global step 550000, trans_decision ep_re 68.68925093748183

{"global_step": 550000, "eval_re": [13.019993166830911, 69.10675513391061, 
128.62152840329568, 147.8332011240467, 146.93666566914746, 23.575457422382335, 
26.619389772951326, 99.20362288675524, 17.562097570201967, 14.413798225295942], 
"eval_len": [27, 54, 95, 98, 108, 28, 31, 86, 23, 18]}

 56%|█████▌    | 559999/1000000 [9:26:10<5:30:18, 22.20it/s]global step 560000, trans_decision ep_re 25.43252500574811

{"global_step": 560000, "eval_re": [11.843783208426299, 82.81277647914003, 
11.245386758756133, 14.745666713261597, 34.598211324745094, 26.634330222077242, 
26.80930062019445, 18.11632781543212, 13.537114540428927, 13.982352375019216], 
"eval_len": [15, 57, 13, 23, 32, 31, 24, 19, 16, 17]}

 57%|█████▋    | 569999/1000000 [9:36:20<5:24:01, 22.12it/s]global step 570000, trans_decision ep_re 50.69633737935904

{"global_step": 570000, "eval_re": [84.1978285422325, 15.721681482569267, 
15.52013983344844, 121.92940997221534, 15.394146688079344, 23.114435679429715, 
25.65470130350687, 108.93110158724734, 75.62528365410198, 20.874645050759703], 
"eval_len": [61, 25, 20, 83, 27, 28, 28, 82, 58, 25]}

 58%|█████▊    | 579999/1000000 [9:46:30<5:15:52, 22.16it/s]global step 580000, trans_decision ep_re 61.395873483595764

{"global_step": 580000, "eval_re": [14.524465122725685, 82.62717614449386, 
16.182678693345395, 23.729125375364404, 20.347482386969915, 116.45522209329623, 
77.86982349484194, 33.79504711277642, 118.8610332044239, 109.56668120771994], 
"eval_len": [23, 66, 17, 31, 24, 85, 71, 42, 92, 105]}

 59%|█████▉    | 589998/1000000 [9:56:40<5:06:32, 22.29it/s]global step 590000, trans_decision ep_re 73.12340497991076

{"global_step": 590000, "eval_re": [177.9146827362703, 107.02740017539232, 
118.41444559181637, 11.59517911476293, 76.13631875521078, 30.29326021902457, 
15.35525455190657, 150.15600524910855, 30.7930800422482, 13.548423363366851], 
"eval_len": [122, 62, 80, 14, 53, 28, 21, 94, 32, 20]}

 60%|█████▉    | 599998/1000000 [10:06:31<4:58:10, 22.36it/s]global step 600000, trans_decision ep_re 52.42036717006731

{"global_step": 600000, "eval_re": [25.561929929652955, 76.65819041019394, 
10.80453916820789, 11.975285616498482, 18.1401119884887, 13.009377155862651, 
14.921735867472401, 293.5224565879731, 39.619713813736766, 19.990331162586177], 
"eval_len": [27, 49, 16, 19, 22, 15, 18, 148, 38, 28]}

 61%|██████    | 609999/1000000 [10:16:50<4:54:16, 22.09it/s]global step 610000, trans_decision ep_re 60.77295758391276

{"global_step": 610000, "eval_re": [28.300128923643857, 9.925560051698566, 
15.663720652130207, 34.893158195277984, 23.893520292356424, 350.89903484667656, 
77.09098486529913, 20.090096766662796, 33.86671313409209, 13.106658111289997], 
"eval_len": [27, 13, 20, 38, 34, 150, 54, 24, 35, 15]}

 62%|██████▏   | 619999/1000000 [10:27:00<4:46:31, 22.10it/s]global step 620000, trans_decision ep_re 42.214757492632785

{"global_step": 620000, "eval_re": [24.152381781790716, 16.171557767876454, 
27.861990374080523, 81.12732350093869, 22.10069187161377, 55.25688500074409, 
20.1788654925056, 132.60194557246837, 11.605386305690365, 31.090547258619207], 
"eval_len": [29, 18, 29, 56, 28, 64, 22, 81, 15, 30]}

 63%|██████▎   | 629998/1000000 [10:36:53<4:34:28, 22.47it/s]global step 630000, trans_decision ep_re 42.770359712009736

{"global_step": 630000, "eval_re": [79.84759367165243, 20.07544495707481, 
36.848598405576425, 11.004904392474682, 168.72177747427506, 30.107264472864564, 
15.308421344364966, 34.497629798276066, 12.407014228799436, 18.884948374738958],
"eval_len": [62, 34, 30, 19, 96, 28, 20, 39, 16, 38]}

 64%|██████▍   | 639999/1000000 [10:47:10<4:29:27, 22.27it/s]global step 640000, trans_decision ep_re 26.870651945024083

{"global_step": 640000, "eval_re": [15.521444872318002, 13.888509209277506, 
17.45300769427815, 16.428213287229365, 15.68130697455638, 106.67574759483928, 
13.762023892465118, 18.967569332060446, 21.478949316726215, 28.849747276490344],
"eval_len": [18, 18, 23, 28, 27, 81, 21, 38, 23, 32]}

 65%|██████▍   | 649999/1000000 [10:57:20<4:23:22, 22.15it/s]global step 650000, trans_decision ep_re 76.25645177342489

{"global_step": 650000, "eval_re": [83.41285998522059, 28.1249758159436, 
315.3348676390382, 20.468090302491202, 18.99381497112931, 30.444140166730236, 
114.32468732120044, 22.380632783376274, 110.67643146036646, 18.404017288752605],
"eval_len": [57, 30, 140, 21, 20, 31, 74, 29, 88, 33]}

 66%|██████▌   | 659999/1000000 [11:07:30<4:15:59, 22.14it/s]global step 660000, trans_decision ep_re 66.72855616958952

{"global_step": 660000, "eval_re": [111.43207271691888, 22.261912699225945, 
32.56462821775239, 88.00051031859014, 19.26261290613077, 25.295648635311142, 
136.72916383604374, 11.772216450639343, 124.52862200488681, 95.43817391039603], 
"eval_len": [103, 24, 32, 62, 28, 33, 98, 14, 92, 78]}

 67%|██████▋   | 669999/1000000 [11:17:40<4:08:09, 22.16it/s]global step 670000, trans_decision ep_re 45.74903636971497

{"global_step": 670000, "eval_re": [18.629615870497794, 26.112645552127848, 
19.935177751698525, 12.441769752042388, 31.938540917098003, 33.33462681389004, 
222.52471640776042, 24.803495894068163, 24.958872677628552, 42.810902060338], 
"eval_len": [30, 25, 27, 17, 41, 29, 120, 24, 32, 34]}

 68%|██████▊   | 679997/1000000 [11:27:32<4:01:46, 22.06it/s]global step 680000, trans_decision ep_re 16.349621659989268

{"global_step": 680000, "eval_re": [16.82213065060312, 10.818969291841576, 
25.152073000272612, 11.915338908244498, 8.64220647486663, 25.190274486166995, 
19.636115659944764, 14.836821387942349, 18.778993878283373, 11.703292861726803],
"eval_len": [39, 14, 41, 15, 17, 26, 31, 21, 25, 19]}

 69%|██████▉   | 689999/1000000 [11:37:50<3:53:19, 22.14it/s]global step 690000, trans_decision ep_re 56.885754517774856

{"global_step": 690000, "eval_re": [15.474897153927813, 213.6689236396385, 
18.760278308923937, 28.900966433500002, 24.318132155654645, 104.3353476830149, 
13.774411236560661, 18.32827141827782, 111.23309430476397, 20.06322284348627], 
"eval_len": [20, 157, 25, 29, 27, 81, 16, 20, 79, 29]}

 70%|██████▉   | 699999/1000000 [11:48:00<3:46:43, 22.05it/s]global step 700000, trans_decision ep_re 55.8142853024889

{"global_step": 700000, "eval_re": [15.045182452222654, 19.551523512321108, 
21.717618288656123, 142.97738676397825, 14.512079280630322, 23.260603495190633, 
79.95972322841432, 31.920259919254157, 178.63056425880535, 30.567911825415948], 
"eval_len": [31, 19, 34, 118, 25, 30, 64, 35, 108, 33]}

 71%|███████   | 709999/1000000 [11:58:10<3:38:45, 22.09it/s]global step 710000, trans_decision ep_re 38.36891455000067

{"global_step": 710000, "eval_re": [126.36260198158598, 11.744689174004593, 
17.34873996649353, 19.5536678898083, 11.929333127201236, 15.913160361204831, 
124.4166989885867, 17.300086539433355, 19.37590082468059, 19.744266647007535], 
"eval_len": [87, 20, 20, 29, 33, 22, 104, 19, 19, 26]}

 72%|███████▏  | 719997/1000000 [12:08:20<3:29:30, 22.27it/s]global step 720000, trans_decision ep_re 60.030534211438315

{"global_step": 720000, "eval_re": [163.6417595252894, 25.54445122491353, 
20.69785244030067, 76.3352373568164, 21.04974723442992, 16.398642682461492, 
131.29378758761345, 16.988360383346322, 112.32149847932607, 16.034005199885875],
"eval_len": [101, 30, 30, 63, 20, 23, 101, 21, 75, 25]}

 73%|███████▎  | 729997/1000000 [12:18:12<3:23:54, 22.07it/s]global step 730000, trans_decision ep_re 34.70450336537803

{"global_step": 730000, "eval_re": [21.70008075564187, 18.45105811565318, 
23.554739782556613, 89.71473456699188, 13.986525695319962, 12.954272877574217, 
26.014126863457953, 89.31058167910243, 17.23829811235595, 34.12061520512621], 
"eval_len": [23, 20, 24, 60, 20, 18, 35, 76, 17, 31]}

 74%|███████▍  | 739999/1000000 [12:28:30<3:16:04, 22.10it/s]global step 740000, trans_decision ep_re 26.036452636230358

{"global_step": 740000, "eval_re": [18.576662533731234, 77.44702819878015, 
19.31059013876708, 36.96343965175215, 22.87168929092554, 14.649270365707181, 
26.56660241754653, 19.679863085570776, 11.30082533866049, 12.998555340862493], 
"eval_len": [19, 51, 23, 30, 26, 25, 28, 24, 15, 21]}

 75%|███████▍  | 749999/1000000 [12:38:40<3:08:17, 22.13it/s]global step 750000, trans_decision ep_re 38.73548085286505

{"global_step": 750000, "eval_re": [37.57994332412354, 29.605674211234966, 
81.39711608532443, 44.47816031121658, 94.51190607371102, 15.7670697023689, 
13.46578890385462, 40.54096337646381, 9.99500144308619, 20.013185097266422], 
"eval_len": [47, 26, 109, 51, 62, 19, 17, 34, 14, 22]}

 76%|███████▌  | 759999/1000000 [12:48:50<3:00:56, 22.11it/s]global step 760000, trans_decision ep_re 22.657631297672275

{"global_step": 760000, "eval_re": [20.131624057154347, 51.07021590514762, 
29.38793726650676, 23.27468520156267, 16.54573623818602, 12.578155242298497, 
22.09553584642484, 12.274508784048326, 13.573527703271372, 25.644386732122314], 
"eval_len": [27, 49, 38, 23, 26, 23, 26, 17, 36, 32]}

 77%|███████▋  | 769997/1000000 [12:59:00<2:54:06, 22.02it/s]global step 770000, trans_decision ep_re 24.232470541173196

{"global_step": 770000, "eval_re": [12.422717379823531, 11.153236224195915, 
17.484080746214413, 19.018279195881778, 17.88473658028744, 25.592473198674817, 
65.20580096778224, 41.30153262107349, 18.444843165447743, 13.817005332350613], 
"eval_len": [22, 15, 18, 29, 23, 25, 45, 33, 19, 20]}

 78%|███████▊  | 779997/1000000 [13:08:52<2:48:08, 21.81it/s]global step 780000, trans_decision ep_re 60.95433788913581

{"global_step": 780000, "eval_re": [159.89760007620336, 23.724757729176325, 
31.732262865162557, 23.164165735959752, 11.979163969140918, 97.22013133549707, 
90.35883811586176, 15.189894983467315, 18.672492819746427, 137.6040712611426], 
"eval_len": [78, 29, 32, 36, 19, 72, 73, 25, 32, 90]}

 79%|███████▉  | 789999/1000000 [13:19:01<2:38:27, 22.09it/s]global step 790000, trans_decision ep_re 39.85340708895444

{"global_step": 790000, "eval_re": [26.085278056607276, 44.86749321454024, 
161.67700980350983, 16.71327848647464, 30.72921634955294, 24.363008367631682, 
26.313713762342218, 20.63531084224689, 27.813361128156703, 19.336400878481932], 
"eval_len": [30, 41, 110, 21, 38, 30, 26, 21, 29, 23]}

 80%|███████▉  | 799999/1000000 [13:29:20<2:31:20, 22.02it/s]global step 800000, trans_decision ep_re 57.02732826299548

{"global_step": 800000, "eval_re": [11.133525947225904, 10.969686356448983, 
27.865191924659445, 38.568424403627866, 20.506915855181852, 89.1807977363341, 
12.890437154829144, 308.10955141461085, 25.10761990775307, 25.94113192928364], 
"eval_len": [17, 18, 27, 47, 24, 60, 22, 141, 33, 30]}

 81%|████████  | 809999/1000000 [13:39:30<2:24:10, 21.96it/s]global step 810000, trans_decision ep_re 22.936889404761622

{"global_step": 810000, "eval_re": [24.432506182613917, 18.43463560072027, 
26.903937245638712, 18.569364022572106, 14.116526444770837, 31.63673296364011, 
19.330505019231087, 38.44540711956113, 25.237124089077476, 12.262155359790592], 
"eval_len": [35, 28, 34, 25, 19, 32, 21, 32, 33, 16]}

 82%|████████▏ | 819999/1000000 [13:49:40<2:16:09, 22.03it/s]global step 820000, trans_decision ep_re 58.969403271487394

{"global_step": 820000, "eval_re": [20.16711967028484, 91.70013743777908, 
88.58168753204568, 21.400753064652605, 33.670084440569006, 13.497592788733458, 
203.34390255693683, 85.71256275649816, 21.382890521561155, 10.237301945813122], 
"eval_len": [25, 70, 93, 26, 31, 19, 110, 94, 28, 19]}

 83%|████████▎ | 829999/1000000 [13:59:50<2:08:17, 22.08it/s]global step 830000, trans_decision ep_re 32.23539579092351

{"global_step": 830000, "eval_re": [14.57295713167789, 21.72455981518265, 
17.47405763769583, 87.43757595839797, 9.2546409392012, 17.267690852336088, 
100.07524571245673, 23.351379729939175, 17.014995000489275, 14.180855131858374],
"eval_len": [16, 30, 22, 92, 13, 19, 107, 25, 22, 20]}

 84%|████████▍ | 839999/1000000 [14:10:00<2:00:16, 22.17it/s]global step 840000, trans_decision ep_re 36.26273460871043

{"global_step": 840000, "eval_re": [32.45808907367772, 73.18664741413035, 
47.93620737847858, 31.83180768344743, 18.646166979610527, 29.414426984912406, 
12.35013342906345, 21.066245432579585, 14.42868919706165, 81.30893251414254], 
"eval_len": [36, 48, 47, 29, 30, 27, 16, 31, 15, 56]}

 85%|████████▍ | 849997/1000000 [14:19:53<1:53:38, 22.00it/s]global step 850000, trans_decision ep_re 35.81439104635838

{"global_step": 850000, "eval_re": [15.518729872851495, 22.642219639043326, 
126.33195993291798, 9.716203818840047, 21.022988679903, 22.64252741867345, 
16.238802111580622, 88.02949473949698, 21.47714648774167, 14.523837762535203], 
"eval_len": [25, 29, 74, 12, 33, 27, 17, 67, 20, 20]}

 86%|████████▌ | 859999/1000000 [14:30:01<1:45:42, 22.07it/s]global step 860000, trans_decision ep_re 67.84615936526893

{"global_step": 860000, "eval_re": [9.784874225422294, 38.54848493926361, 
132.74198266160934, 19.287043919410547, 43.07554470437832, 137.0691821859562, 
143.06474891001605, 16.059787737427946, 121.84887822763112, 16.981066141573926],
"eval_len": [16, 37, 83, 30, 40, 80, 93, 21, 87, 18]}

 87%|████████▋ | 869999/1000000 [14:40:20<1:37:54, 22.13it/s]global step 870000, trans_decision ep_re 40.08784405061056

{"global_step": 870000, "eval_re": [12.199207559061666, 38.225875339304764, 
15.627698206774772, 173.09713265274442, 17.361209037049225, 25.705779710469635, 
26.94324252781779, 55.192211001187594, 21.464090494280036, 15.061993977415765], 
"eval_len": [16, 48, 21, 100, 25, 40, 33, 50, 33, 22]}

 88%|████████▊ | 879999/1000000 [14:50:30<1:30:02, 22.21it/s]global step 880000, trans_decision ep_re 44.63467297139889

{"global_step": 880000, "eval_re": [28.57325362045325, 37.448839813275434, 
110.46550955651712, 22.47345405202549, 18.7428445015813, 30.57572125007107, 
15.728529954189533, 135.19527660343346, 26.28473392967063, 20.85856643277162], 
"eval_len": [34, 34, 70, 33, 20, 27, 33, 97, 31, 44]}

 89%|████████▉ | 889999/1000000 [15:00:40<1:23:13, 22.03it/s]global step 890000, trans_decision ep_re 29.492490125944006

{"global_step": 890000, "eval_re": [14.819862686623546, 24.55407600370884, 
11.534553675677039, 14.99028565252251, 22.15804883324031, 12.910417537132354, 
19.930989268322083, 19.531418535019785, 11.56992115583512, 142.92532791135847], 
"eval_len": [19, 22, 22, 17, 28, 17, 28, 28, 19, 86]}

 90%|████████▉ | 899997/1000000 [15:10:34<1:15:27, 22.09it/s]global step 900000, trans_decision ep_re 33.21818250657388

{"global_step": 900000, "eval_re": [14.965596042000344, 11.21956436911634, 
11.60419401607911, 105.86153123463035, 96.4851792277285, 15.854436151860849, 
28.606437275229062, 11.92302673610241, 15.398384622254058, 20.263475390737774], 
"eval_len": [20, 15, 13, 82, 70, 22, 29, 14, 21, 20]}

 91%|█████████ | 909999/1000000 [15:20:41<1:07:57, 22.07it/s]global step 910000, trans_decision ep_re 39.238802503600176

{"global_step": 910000, "eval_re": [22.190755923415388, 26.231283427412762, 
81.32124225761723, 90.27325254713104, 15.079355509501475, 10.251241470791316, 
28.69973507585323, 28.343997456603912, 14.474994838378588, 75.52216652929683], 
"eval_len": [31, 25, 55, 62, 18, 14, 34, 30, 19, 52]}

 92%|█████████▏| 919999/1000000 [15:31:01<1:00:22, 22.09it/s]global step 920000, trans_decision ep_re 42.09162213835144

{"global_step": 920000, "eval_re": [17.62178728754028, 13.593559969210462, 
72.96285249939041, 48.38742048242068, 14.950546723357764, 30.126906518003782, 
138.51503549236548, 21.66337456480745, 49.2015596732608, 13.893178173157334], 
"eval_len": [27, 33, 74, 46, 21, 37, 96, 26, 44, 18]}

 93%|█████████▎| 929999/1000000 [15:41:11<52:21, 22.28it/s]global step 930000, trans_decision ep_re 47.455365805320845

{"global_step": 930000, "eval_re": [29.38616203561656, 18.40033151188662, 
13.153732725908544, 145.73488041403067, 23.30855619842682, 16.412462863222707, 
160.90906266394694, 32.314810446569965, 13.058149391582901, 21.87550980201673], 
"eval_len": [34, 19, 18, 116, 27, 30, 92, 31, 19, 21]}

 94%|█████████▍| 939999/1000000 [15:51:02<45:13, 22.12it/s]global step 940000, trans_decision ep_re 52.63975444965263

{"global_step": 940000, "eval_re": [20.76991420744483, 202.71369134658045, 
10.554600313263823, 90.9654577434694, 15.617059702127579, 103.2347153062794, 
17.513746050496447, 16.19062263702584, 29.268051403057946, 19.569685786780582], 
"eval_len": [23, 112, 13, 66, 19, 70, 28, 27, 27, 21]}

 95%|█████████▍| 949999/1000000 [16:01:21<37:44, 22.08it/s]global step 950000, trans_decision ep_re 38.83919697819771

{"global_step": 950000, "eval_re": [13.38673816604062, 19.488857316501388, 
16.80842673956285, 26.809193780075272, 12.887486072647606, 162.88861396910315, 
18.756947461357832, 18.343603846107527, 80.23821498821522, 18.783887442365714], 
"eval_len": [21, 28, 23, 26, 17, 125, 24, 18, 61, 20]}

 96%|█████████▌| 959999/1000000 [16:11:31<30:25, 21.92it/s]global step 960000, trans_decision ep_re 76.997389314418

{"global_step": 960000, "eval_re": [18.61984565473382, 15.849796848382974, 
233.64855757378348, 114.61663139848416, 20.881013323476473, 220.1297854146022, 
35.312376814797574, 82.44008881146003, 15.604978563961776, 12.870818740497455], 
"eval_len": [24, 19, 113, 87, 20, 131, 35, 61, 22, 29]}

 97%|█████████▋| 969999/1000000 [16:21:41<22:38, 22.09it/s]global step 970000, trans_decision ep_re 25.54008172619476

{"global_step": 970000, "eval_re": [37.337639152241685, 24.137400072537556, 
21.61350361334056, 15.599610751405729, 16.81256585766963, 16.190576941298502, 
34.317836109671966, 40.6689015401073, 23.194185486688095, 25.52859773698656], 
"eval_len": [34, 24, 29, 17, 33, 22, 31, 31, 42, 42]}

 98%|█████████▊| 979998/1000000 [16:31:31<14:54, 22.36it/s]global step 980000, trans_decision ep_re 43.79603154047456

{"global_step": 980000, "eval_re": [18.082903450878522, 17.66486776180341, 
147.37437171024266, 26.532793702664506, 28.05915516744005, 11.739017667116919, 
12.408506637173469, 17.523098474678584, 29.206617547750735, 129.36898328499677],
"eval_len": [21, 19, 121, 28, 30, 17, 16, 17, 29, 102]}

 99%|█████████▉| 989999/1000000 [16:41:51<07:32, 22.12it/s]global step 990000, trans_decision ep_re 39.94078676729632

{"global_step": 990000, "eval_re": [17.502280210081054, 15.380804555599, 
14.41990676113318, 23.649612582975905, 32.45900415983526, 19.74567588335388, 
22.759396828140407, 105.79003082794819, 129.01328904482642, 18.68786681906996], 
"eval_len": [18, 21, 18, 33, 33, 34, 32, 81, 94, 26]}

100%|█████████▉| 999999/1000000 [16:52:01<00:00, 22.16it/s]global step 1000000, trans_decision ep_re 83.9246869678665

{"global_step": 1000000, "eval_re": [132.19643976020387, 115.05409528568735, 
281.5604216679889, 29.794421846642592, 13.53098127850668, 107.86685411643491, 
20.253934003593486, 13.789258227573164, 106.0908129665106, 19.109650525523577], 
"eval_len": [85, 70, 164, 57, 18, 65, 27, 22, 88, 25]}

100%|██████████| 1000000/1000000 [16:52:03<00:00, 16.47it/s]
