
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:11<10:55:33, 25.17it/s]global step 10000, trans_decision ep_re 173.86306243290625

{"global_step": 10000, "eval_re": [281.02694295632875, 212.536117180179, 
14.197524860875834, 278.63994562743284, 148.92959101028416, 13.803702422599695, 
282.4752093776234, 211.66670062499082, 283.7767856537014, 11.578104615046593], 
"eval_len": [179, 118, 25, 159, 93, 25, 232, 110, 162, 25]}

  2%|▏         | 19997/1000000 [12:52<10:30:48, 25.89it/s]global step 20000, trans_decision ep_re 25.70748504736676

{"global_step": 20000, "eval_re": [7.635334167046087, 128.24004981010455, 
16.1097919577322, 6.6430311285463315, 13.08472056767827, 15.181156672896355, 
9.850498851295615, 16.511069462276392, 23.22534077863315, 20.59385707745859], 
"eval_len": [24, 97, 27, 22, 26, 36, 32, 38, 40, 38]}

  3%|▎         | 29998/1000000 [21:40<10:23:53, 25.91it/s]global step 30000, trans_decision ep_re 76.04652979783954

{"global_step": 30000, "eval_re": [124.50126525094927, 144.67603860144794, 
14.025614631959249, 124.29312527445433, 12.25798866368206, 11.302948272706747, 
152.90515880360687, 13.053690934113611, 8.365368263694988, 155.08409928178028], 
"eval_len": [92, 99, 35, 90, 24, 22, 75, 26, 25, 103]}

  4%|▍         | 39998/1000000 [30:20<10:11:59, 26.14it/s]global step 40000, trans_decision ep_re 125.93127462508642

{"global_step": 40000, "eval_re": [151.97774739015853, 258.2100177951473, 
159.43667557427472, 7.71995806068817, 11.46172619858728, 179.62230431677108, 
261.7519042347044, 13.790516650203084, 8.016344245444266, 207.3255517848854], 
"eval_len": [108, 162, 81, 21, 96, 113, 157, 25, 26, 159]}

  5%|▍         | 49999/1000000 [38:50<10:21:55, 25.46it/s]global step 50000, trans_decision ep_re 31.490078270243934

{"global_step": 50000, "eval_re": [17.63396369816898, 140.0520049293639, 
17.69647337219073, 8.675006571034674, 9.221604653817852, 15.560247124197112, 
18.2739211412265, 8.996439930604625, 61.33819364473635, 17.452927637098643], 
"eval_len": [29, 150, 30, 18, 31, 35, 33, 25, 115, 32]}

  6%|▌         | 59999/1000000 [47:31<10:16:33, 25.41it/s]global step 60000, trans_decision ep_re 22.371662630905

{"global_step": 60000, "eval_re": [9.544324616924657, 13.253901916323692, 
26.227858234194798, 8.97191296620025, 36.58747506647259, 6.472444128074447, 
33.85168632496112, 26.42290260704406, 21.951281547413, 40.43283890144135], 
"eval_len": [22, 39, 47, 27, 51, 25, 50, 48, 38, 48]}

  7%|▋         | 69997/1000000 [56:30<10:13:16, 25.27it/s]global step 70000, trans_decision ep_re 75.67913182478546

{"global_step": 70000, "eval_re": [107.88679831568878, 17.093058710738035, 
247.72776260903635, 13.411430637290508, 20.30546087520359, 113.53945984530726, 
83.13517205180705, 11.412814597413048, 88.67731662550499, 53.602043979864874], 
"eval_len": [116, 26, 201, 27, 33, 105, 110, 23, 86, 77]}

  8%|▊         | 79999/1000000 [1:05:10<10:13:45, 24.98it/s]global step 80000, trans_decision ep_re 88.63544762436588

{"global_step": 80000, "eval_re": [12.85315946013647, 14.151126732902483, 
14.703466629249577, 20.237635384357084, 236.09828209732657, 84.02052271604288, 
5.607010483263875, 72.84169193025566, 9.783375979712279, 416.058204830412], 
"eval_len": [23, 31, 26, 29, 134, 138, 18, 69, 24, 227]}

  9%|▉         | 89997/1000000 [1:14:00<10:17:22, 24.57it/s]global step 90000, trans_decision ep_re 29.997750022963306

{"global_step": 90000, "eval_re": [58.19226521665807, 11.235507845270837, 
55.850879644671636, 14.533720960459906, 12.032064621806152, 18.41536489283288, 
8.774281124495923, 81.13748018025028, 16.61591439281933, 23.19002135036799], 
"eval_len": [89, 27, 65, 25, 25, 38, 28, 73, 28, 37]}

 10%|▉         | 99998/1000000 [1:22:50<9:51:37, 25.35it/s]global step 100000, trans_decision ep_re 93.1411296380351

{"global_step": 100000, "eval_re": [176.97741428875167, 13.539613682034098, 
227.20736719660937, 19.64621362139577, 10.484307322727796, 119.91100314011581, 
166.97666727464593, 157.91301073688942, 21.707512919681804, 17.04818619749929], 
"eval_len": [130, 22, 180, 33, 22, 134, 107, 112, 39, 30]}

 11%|█         | 109998/1000000 [1:31:24<9:47:54, 25.23it/s]global step 110000, trans_decision ep_re 50.13063770472608

{"global_step": 110000, "eval_re": [10.074088193582364, 14.457110574760796, 
8.552913403675676, 8.384697895368197, 235.8273608513765, 15.523046998445862, 
182.57964678356515, 10.950387357481322, 9.797083614698192, 5.160041374306695], 
"eval_len": [21, 27, 28, 22, 116, 27, 141, 20, 29, 16]}

 12%|█▏        | 119998/1000000 [1:40:13<9:41:28, 25.22it/s]global step 120000, trans_decision ep_re 13.810941900140017

{"global_step": 120000, "eval_re": [15.264596809938716, 16.785436166313225, 
12.232616783102053, 6.505159197365236, 5.678535068112507, 15.155635697100072, 
19.561247980376137, 19.00748662639637, 14.61899315988897, 13.299711512806882], 
"eval_len": [31, 31, 27, 16, 17, 30, 31, 29, 31, 27]}

 13%|█▎        | 129999/1000000 [1:49:02<9:43:57, 24.83it/s]global step 130000, trans_decision ep_re 11.673359499187544

{"global_step": 130000, "eval_re": [14.054959873625993, 8.89365444317926, 
19.32141382454672, 11.485268529622408, 10.156546696512173, 7.70146402813057, 
14.204785120238096, 14.748669950279668, 6.52262044189357, 9.644212083846998], 
"eval_len": [24, 23, 30, 27, 20, 27, 25, 25, 19, 26]}

 14%|█▍        | 139998/1000000 [1:57:53<9:31:06, 25.10it/s]global step 140000, trans_decision ep_re 145.09412476418504

{"global_step": 140000, "eval_re": [4.519389987817897, 522.6323201582147, 
8.55367182464662, 8.657808117479123, 9.32413242524848, 73.55068987464398, 
14.851794828142477, 17.97430521852036, 434.4626785370007, 356.41445667013585], 
"eval_len": [16, 251, 29, 23, 25, 106, 29, 29, 215, 180]}

 15%|█▍        | 149997/1000000 [2:07:00<9:29:26, 24.88it/s]global step 150000, trans_decision ep_re 199.60639395107535

{"global_step": 150000, "eval_re": [14.499265389761296, 678.4079026082645, 
12.673692649551583, 288.09603165487266, 14.2912218981814, 346.4645533850222, 
8.464586536138503, 600.5212395485021, 14.259858178668035, 18.38558766179134], 
"eval_len": [29, 277, 28, 135, 29, 183, 21, 228, 23, 31]}

 16%|█▌        | 159999/1000000 [2:15:41<9:13:03, 25.31it/s]global step 160000, trans_decision ep_re 69.30070872394147

{"global_step": 160000, "eval_re": [91.43962281241484, 7.727773788009392, 
13.978876605649775, 11.615974515314814, 167.69287569621025, 196.3877663468364, 
15.403307585753124, 7.586676944299616, 3.365460959902802, 177.80875198502366], 
"eval_len": [98, 25, 24, 26, 102, 127, 28, 24, 16, 105]}

 17%|█▋        | 169999/1000000 [2:24:30<9:13:56, 24.97it/s]global step 170000, trans_decision ep_re 270.66638997549165

{"global_step": 170000, "eval_re": [630.868874220376, 618.7529120768314, 
260.4428520521384, 12.238789552109473, 11.825524958642017, 16.090535885990597, 
9.399544052546343, 15.890228404324562, 520.4671167904397, 610.6875217615179], 
"eval_len": [228, 276, 126, 27, 22, 39, 21, 25, 205, 234]}

 18%|█▊        | 179999/1000000 [2:33:21<9:09:39, 24.86it/s]global step 180000, trans_decision ep_re 85.61359895515554

{"global_step": 180000, "eval_re": [11.85617496696129, 7.898903105956346, 
14.6727366095922, 138.72298377771955, 3.950233742094332, 10.723406976504757, 
34.784339467406056, 151.61139756750688, 470.5824244988695, 11.333388838944572], 
"eval_len": [33, 25, 27, 93, 18, 23, 44, 96, 181, 26]}

 19%|█▉        | 189998/1000000 [2:42:14<8:59:08, 25.04it/s]global step 190000, trans_decision ep_re 61.40471864590735

{"global_step": 190000, "eval_re": [12.402860691183406, 8.685658999255946, 
166.9043406580646, 16.525155977324772, 174.395065115479, 35.207075297694935, 
139.79671464979182, 36.39927966055877, 10.292085602394433, 13.438949807325768], 
"eval_len": [25, 22, 102, 30, 99, 46, 86, 48, 24, 22]}

 20%|█▉        | 199998/1000000 [2:51:02<8:43:41, 25.46it/s]global step 200000, trans_decision ep_re 110.52831211556418

{"global_step": 200000, "eval_re": [17.615806439501192, 178.79157196535976, 
792.94577538379, 11.903024581111058, 15.27817049119374, 15.536120374706739, 
11.142337909339446, 16.293797659938928, 16.627263337420086, 29.149253013280667],
"eval_len": [29, 126, 292, 24, 30, 28, 23, 27, 28, 43]}

 21%|██        | 209999/1000000 [2:59:50<8:38:08, 25.41it/s]global step 210000, trans_decision ep_re 284.42043145870605

{"global_step": 210000, "eval_re": [948.4939137254609, 154.08517860544248, 
952.6031015680378, 17.809599791251983, 19.088468176794205, 8.628969755046011, 
10.397345497195497, 707.9661002875393, 15.91398865344153, 9.217648526850402], 
"eval_len": [479, 106, 317, 30, 37, 25, 30, 266, 28, 27]}

 22%|██▏       | 219998/1000000 [3:08:50<8:26:23, 25.67it/s]global step 220000, trans_decision ep_re 397.69223322682575

{"global_step": 220000, "eval_re": [654.1159644616927, 46.05394278584149, 
1165.1790759863616, 1347.3303356359716, 176.70572195176652, 6.016658345187742, 
531.9073130199197, 12.196475139702263, 18.700712518859675, 18.716132422954164], 
"eval_len": [262, 47, 443, 450, 96, 25, 202, 24, 31, 40]}

 23%|██▎       | 229998/1000000 [3:17:40<8:30:39, 25.13it/s]global step 230000, trans_decision ep_re 306.3723608123478

{"global_step": 230000, "eval_re": [10.77417001260668, 688.1769579200331, 
11.25245371997537, 901.4677384562169, 6.057836313475547, 14.826485514064135, 
6.026517519461611, 13.443300805350477, 680.67982789505, 731.0183199672438], 
"eval_len": [22, 277, 27, 317, 20, 27, 18, 28, 260, 268]}

 24%|██▍       | 239997/1000000 [3:26:30<8:21:35, 25.25it/s]global step 240000, trans_decision ep_re 101.63300532734874

{"global_step": 240000, "eval_re": [153.24678999990275, 16.164061089404363, 
17.489011916395963, 10.438071928323838, 13.03176212421234, 649.0733649613799, 
4.16233408785127, 12.084154279378257, 125.2704576688412, 15.37004521779763], 
"eval_len": [87, 27, 29, 28, 31, 254, 19, 32, 81, 29]}

 25%|██▍       | 249999/1000000 [3:35:00<8:13:11, 25.35it/s]global step 250000, trans_decision ep_re 270.8801294255964

{"global_step": 250000, "eval_re": [306.41132326994904, 295.54842367976096, 
5.284740087618464, 1040.6165183185346, 16.251084276190824, 10.74297490098576, 
836.037610509455, 172.23992284230525, 8.06768077006442, 17.601015601099597], 
"eval_len": [165, 188, 19, 406, 28, 36, 305, 95, 23, 27]}

 26%|██▌       | 259998/1000000 [3:43:41<7:53:12, 26.06it/s]global step 260000, trans_decision ep_re 222.45125584236453

{"global_step": 260000, "eval_re": [155.168934752536, 6.584397729178581, 
352.3289590719255, 165.7372544172164, 809.0191756892397, 138.25166900106788, 
13.375013920782504, 187.00712763741697, 387.3068686071332, 9.73315759714831], 
"eval_len": [98, 25, 161, 106, 297, 104, 29, 131, 164, 21]}

 27%|██▋       | 269999/1000000 [3:52:21<7:56:10, 25.55it/s]global step 270000, trans_decision ep_re 466.7894828197548

{"global_step": 270000, "eval_re": [1952.3302932670124, 726.15396180529, 
10.384643200057253, 121.57236383517234, 9.961140301695703, 279.60618261455284, 
584.6162246766081, 574.0819857723543, 27.862023937348177, 381.32600878745643], 
"eval_len": [607, 245, 30, 84, 23, 127, 227, 252, 46, 203]}

 28%|██▊       | 279999/1000000 [4:01:20<7:53:31, 25.34it/s]global step 280000, trans_decision ep_re 135.625506723947

{"global_step": 280000, "eval_re": [17.11209162942845, 16.06781964818539, 
514.3316997128685, 8.021802713622023, 200.77157376528226, 16.178390515053696, 
30.45116906263511, 136.84504793212685, 12.098757892574822, 404.3767143676931], 
"eval_len": [28, 29, 204, 19, 138, 27, 43, 105, 29, 207]}

 29%|██▉       | 289997/1000000 [4:09:54<7:51:56, 25.07it/s]global step 290000, trans_decision ep_re 108.45285368508603

{"global_step": 290000, "eval_re": [6.663975155809132, 334.62055417994435, 
12.20259392724381, 33.476439990811144, 9.74600620345537, 14.220285991089778, 
8.213718816175842, 345.9894793572329, 14.46563983035799, 304.92984339874005], 
"eval_len": [18, 145, 27, 44, 30, 26, 18, 170, 25, 151]}

 30%|██▉       | 299999/1000000 [4:18:50<7:33:25, 25.73it/s]global step 300000, trans_decision ep_re 208.1269075428908

{"global_step": 300000, "eval_re": [220.28498982463722, 264.185551139026, 
11.541071326999143, 15.175508145292858, 31.695887299761257, 7.309156419486063, 
745.2783839831045, 8.70558205335136, 762.5480093768125, 14.544935860437127], 
"eval_len": [126, 152, 27, 29, 45, 18, 254, 29, 281, 27]}

 31%|███       | 309999/1000000 [4:27:20<7:31:10, 25.49it/s]global step 310000, trans_decision ep_re 125.12986838288282

{"global_step": 310000, "eval_re": [304.61575621934986, 10.792006612293509, 
748.1721928051076, 44.12864895944051, 11.981934535800727, 40.21079874242841, 
25.086483756955662, 42.12696093875108, 12.232662281494774, 11.951238977205938], 
"eval_len": [170, 26, 402, 45, 27, 47, 43, 46, 29, 30]}

 32%|███▏      | 319997/1000000 [4:36:20<7:24:49, 25.48it/s]global step 320000, trans_decision ep_re 255.66065516461217

{"global_step": 320000, "eval_re": [675.8940318322549, 17.722546526104008, 
14.594251339351658, 991.6085431039437, 15.300169206931619, 475.37313781246525, 
318.4833321736042, 15.359258263845598, 18.81913411420581, 13.45214727341486], 
"eval_len": [314, 30, 29, 333, 30, 218, 174, 27, 28, 27]}

 33%|███▎      | 329999/1000000 [4:44:51<7:26:00, 25.04it/s]global step 330000, trans_decision ep_re 282.64722366915703

{"global_step": 330000, "eval_re": [18.112520303397876, 14.400587942506279, 
350.9568207311449, 712.1534625604284, 11.965648007211154, 311.0848285170047, 
279.438631064931, 10.784898910198798, 8.047398772236164, 1109.5274398825109], 
"eval_len": [28, 24, 171, 276, 22, 155, 152, 26, 24, 410]}

 34%|███▍      | 339999/1000000 [4:53:50<7:22:20, 24.87it/s]global step 340000, trans_decision ep_re 312.9615583634928

{"global_step": 340000, "eval_re": [486.7125819918026, 421.14604497692545, 
12.593671957469363, 456.2361366303332, 1116.7976775895336, 594.3102544888104, 
10.327285921634578, 16.42170411628619, 8.585074670616898, 6.485151291516165], 
"eval_len": [227, 181, 22, 175, 358, 221, 20, 27, 18, 29]}

 35%|███▍      | 349997/1000000 [5:02:40<7:05:12, 25.48it/s]global step 350000, trans_decision ep_re 372.30669723276253

{"global_step": 350000, "eval_re": [217.30515780048617, 625.3978932876346, 
173.92757847169898, 11.193366917324003, 791.2256081435471, 776.3882633516558, 
35.67254046014772, 690.6740770517997, 392.1051700828361, 9.177316760495131], 
"eval_len": [127, 229, 97, 23, 271, 248, 43, 266, 161, 20]}

 36%|███▌      | 359999/1000000 [5:11:30<7:01:52, 25.28it/s]global step 360000, trans_decision ep_re 442.95995629193504

{"global_step": 360000, "eval_re": [11.248374022269795, 711.0035036102885, 
767.4942506624106, 12.416336096580313, 540.3036536154897, 806.352836579299, 
14.542838648673762, 422.63652187046165, 9.125024380570153, 1134.476223433307], 
"eval_len": [23, 282, 252, 28, 202, 334, 26, 184, 24, 358]}

 37%|███▋      | 369999/1000000 [5:20:02<7:00:45, 24.96it/s]global step 370000, trans_decision ep_re 95.94810313362237

{"global_step": 370000, "eval_re": [10.80693984470232, 4.512250532159854, 
8.474989072465206, 731.0147816779464, 9.32482079968317, 12.460064846800044, 
10.254630171523576, 7.915529147508569, 148.25699888811323, 16.460026355321432], 
"eval_len": [23, 18, 22, 281, 25, 24, 30, 28, 107, 26]}

 38%|███▊      | 379999/1000000 [5:29:00<6:51:01, 25.14it/s]global step 380000, trans_decision ep_re 183.70328228373037

{"global_step": 380000, "eval_re": [26.992586806800023, 7.285849802674226, 
8.011965405276639, 43.590345520859664, 758.2567092007192, 8.857282075174274, 
330.51653283562194, 16.03805408076087, 8.43402420586954, 629.0494729035473], 
"eval_len": [43, 21, 20, 43, 269, 21, 167, 27, 22, 235]}

 39%|███▉      | 389998/1000000 [5:37:50<6:41:17, 25.33it/s]global step 390000, trans_decision ep_re 197.09079169251464

{"global_step": 390000, "eval_re": [11.919022769281211, 16.879728576198655, 
40.5646171619277, 317.28410102694795, 417.5774283643475, 9.177512761324849, 
226.1805964224021, 529.1231895594915, 17.158935802902537, 385.0427844803224], 
"eval_len": [24, 24, 43, 148, 191, 29, 128, 263, 27, 194]}

 40%|███▉      | 399998/1000000 [5:46:30<6:27:14, 25.82it/s]global step 400000, trans_decision ep_re 409.34439501819384

{"global_step": 400000, "eval_re": [13.257060641336244, 274.5427743967751, 
5.421630671722119, 691.609764186658, 5.079622958517842, 1032.8682370137667, 
693.4435579557506, 142.23837856476078, 15.967423858699881, 1219.015499933951], 
"eval_len": [24, 136, 17, 233, 17, 311, 278, 103, 26, 384]}

 41%|████      | 409998/1000000 [5:55:20<6:26:23, 25.45it/s]global step 410000, trans_decision ep_re 141.50087335297127

{"global_step": 410000, "eval_re": [47.07561170371986, 11.563931222446685, 
13.920568833419834, 1258.602397475916, 14.784197336140707, 19.449002023698075, 
13.933447283299701, 13.269964699290313, 9.886205529469379, 12.523407422312381], 
"eval_len": [46, 26, 24, 357, 27, 27, 27, 27, 21, 24]}

 42%|████▏     | 419998/1000000 [6:04:10<6:17:50, 25.58it/s]global step 420000, trans_decision ep_re 310.34899044921065

{"global_step": 420000, "eval_re": [811.8783297546178, 153.53573290293377, 
1367.7432073987939, 580.0105451975948, 8.580210240989881, 12.172287172402068, 
7.421804835868161, 8.710662295384019, 139.2636611704554, 14.173463523067188], 
"eval_len": [245, 103, 425, 214, 20, 21, 19, 23, 102, 37]}

 43%|████▎     | 429998/1000000 [6:12:50<6:02:37, 26.20it/s]global step 430000, trans_decision ep_re 199.19585115141496

{"global_step": 430000, "eval_re": [30.392662007335648, 8.775800595322625, 
981.2825757439596, 796.6660830021256, 107.40676266485505, 13.978852937965959, 
16.969904565305473, 11.014360496225557, 8.353583790415078, 17.117925710639238], 
"eval_len": [45, 23, 313, 266, 87, 28, 29, 21, 24, 28]}

 44%|████▍     | 439998/1000000 [6:21:30<6:04:40, 25.59it/s]global step 440000, trans_decision ep_re 242.2799872568606

{"global_step": 440000, "eval_re": [213.4535438699712, 2004.3970662214945, 
11.1141612540604, 8.888099313750514, 115.25207399510253, 30.75030517153302, 
12.063497573349144, 9.705935739747016, 8.195736113350616, 8.979453316247762], 
"eval_len": [168, 542, 22, 20, 134, 42, 22, 23, 18, 19]}

 45%|████▍     | 449999/1000000 [6:30:10<5:57:21, 25.65it/s]global step 450000, trans_decision ep_re 296.207455232333

{"global_step": 450000, "eval_re": [296.75194932941696, 67.29233223587038, 
8.393714763869893, 1160.2887065496996, 9.902521327939969, 95.50003312248478, 
5.991134963690598, 24.06262060704927, 1287.4016086161562, 6.489930807153021], 
"eval_len": [198, 155, 22, 357, 23, 117, 16, 45, 441, 17]}

 46%|████▌     | 459999/1000000 [6:38:42<6:00:16, 24.98it/s]global step 460000, trans_decision ep_re 251.56768292429993

{"global_step": 460000, "eval_re": [182.47740967061617, 12.766847694743456, 
17.286944605224196, 1338.0267264259294, 250.22476570885414, 535.1385712785669, 
151.97286449467074, 5.985554178064469, 9.158726436759906, 12.638418749570251], 
"eval_len": [153, 24, 28, 416, 152, 238, 93, 24, 24, 26]}

 47%|████▋     | 469999/1000000 [6:47:40<5:48:34, 25.34it/s]global step 470000, trans_decision ep_re 340.11063279441476

{"global_step": 470000, "eval_re": [14.368032802742059, 57.681297486527946, 
481.5485828601897, 7.701244585419302, 741.0603790413558, 1388.2439236396876, 
17.32860763706451, 674.1658005541026, 6.512951785014073, 12.495507552043911], 
"eval_len": [24, 77, 231, 23, 253, 462, 26, 276, 20, 24]}

 48%|████▊     | 479999/1000000 [6:56:20<5:37:07, 25.71it/s]global step 480000, trans_decision ep_re 328.9152293460576

{"global_step": 480000, "eval_re": [33.438545720242885, 9.195165407863973, 
872.5872786356821, 8.767178623238639, 38.565230687071804, 1102.7107665343538, 
1146.244523095398, 37.464244542482255, 29.903879796356286, 10.27548041788637], 
"eval_len": [44, 20, 329, 30, 46, 312, 336, 43, 43, 26]}

 49%|████▉     | 489999/1000000 [7:04:51<5:33:32, 25.48it/s]global step 490000, trans_decision ep_re 17.17544983417141

{"global_step": 490000, "eval_re": [11.502153279125062, 10.237813754425343, 
16.611639473106738, 39.58146815235705, 15.442712474024773, 6.347342909116313, 
8.430847493568201, 10.481896510106495, 40.08739324356002, 13.031231052324134], 
"eval_len": [22, 21, 40, 43, 26, 20, 22, 22, 45, 27]}

 50%|████▉     | 499999/1000000 [7:13:40<5:25:36, 25.59it/s]global step 500000, trans_decision ep_re 270.3758419740453

{"global_step": 500000, "eval_re": [160.20631772492706, 662.599623653215, 
22.867151100703715, 10.20181093641285, 27.150228797929806, 798.0195889451614, 
281.0422532711109, 190.08073020480634, 403.93840706566453, 147.6523080405211], 
"eval_len": [123, 235, 41, 25, 44, 285, 138, 119, 212, 144]}

 51%|█████     | 509999/1000000 [7:22:10<5:18:05, 25.67it/s]global step 510000, trans_decision ep_re 202.76114427339255

{"global_step": 510000, "eval_re": [216.6368601019187, 7.498628704087638, 
10.922907842783792, 11.05109179646546, 208.38423819868936, 19.43781129346835, 
909.726400552108, 452.4451068612699, 5.19664126028915, 186.31175612284505], 
"eval_len": [137, 21, 27, 25, 143, 43, 291, 209, 18, 101]}

 52%|█████▏    | 519997/1000000 [7:31:00<5:08:16, 25.95it/s]global step 520000, trans_decision ep_re 285.8292834064381

{"global_step": 520000, "eval_re": [239.79613974699635, 561.9765916332475, 
13.10813931010736, 21.60524705086598, 167.41931573461667, 572.3983164142493, 
626.9777850169353, 4.25362507626288, 647.4226725693687, 3.3350015117312344], 
"eval_len": [146, 257, 28, 41, 97, 224, 234, 23, 530, 17]}

 53%|█████▎    | 529998/1000000 [7:39:21<4:59:58, 26.11it/s]global step 530000, trans_decision ep_re 200.5184657393943

{"global_step": 530000, "eval_re": [28.738298975763175, 10.274400473149788, 
1345.6842467243519, 429.1456116950754, 9.972048540951773, 6.847419654019301, 
11.212492107468593, 117.29182553435464, 11.113947428873097, 34.90436625993521], 
"eval_len": [45, 23, 524, 196, 24, 19, 23, 81, 29, 46]}

 54%|█████▍    | 539999/1000000 [7:47:54<4:56:04, 25.89it/s]global step 540000, trans_decision ep_re 177.99268560743945

{"global_step": 540000, "eval_re": [8.066486950659552, 209.72009325715456, 
461.81254171919187, 8.209620794538367, 418.51985213274673, 9.31262754354076, 
9.09104518603463, 14.330724333598704, 629.6273135887825, 11.236550568146848], 
"eval_len": [28, 175, 196, 21, 219, 26, 20, 29, 256, 27]}

 55%|█████▍    | 549998/1000000 [7:56:40<4:46:01, 26.22it/s]global step 550000, trans_decision ep_re 62.50637075090653

{"global_step": 550000, "eval_re": [335.89166309413605, 33.492120102864725, 
29.50650667681808, 9.5535417713402, 13.527481184521509, 8.948665956633214, 
10.39864260596956, 167.76873111721738, 10.062260242872487, 5.914094756691907], 
"eval_len": [163, 45, 44, 27, 27, 26, 22, 103, 20, 19]}

 56%|█████▌    | 559999/1000000 [8:05:10<4:42:20, 25.97it/s]global step 560000, trans_decision ep_re 153.40017646938458

{"global_step": 560000, "eval_re": [361.63278432465256, 17.183560060976145, 
31.619786558317784, 20.758285872904967, 12.028053299441572, 14.426029699247245, 
9.97388684222309, 15.51772653089078, 17.059094212589006, 1033.8025572926026], 
"eval_len": [184, 30, 43, 34, 29, 27, 28, 26, 28, 322]}

 57%|█████▋    | 569998/1000000 [8:13:32<4:31:54, 26.36it/s]global step 570000, trans_decision ep_re 254.34846516778907

{"global_step": 570000, "eval_re": [4.535880615760573, 1161.8605102850963, 
842.1167802767278, 7.099445088687133, 12.649142117745686, 463.4330612424516, 
8.514986004565973, 18.13525921963783, 11.35945925078864, 13.780127576429244], 
"eval_len": [23, 361, 258, 28, 29, 195, 20, 28, 23, 24]}

 58%|█████▊    | 579997/1000000 [8:22:20<4:32:26, 25.69it/s]global step 580000, trans_decision ep_re 220.98721095902812

{"global_step": 580000, "eval_re": [1375.3338534556744, 14.767390235486, 
14.439756459663327, 9.101809692458923, 8.27198719273819, 6.799750992705982, 
725.4121538225721, 11.78139544800997, 31.99721725265203, 11.966795038320319], 
"eval_len": [395, 30, 28, 26, 30, 18, 316, 23, 43, 23]}

 59%|█████▉    | 589999/1000000 [8:30:50<4:23:37, 25.92it/s]global step 590000, trans_decision ep_re 41.524457516003686

{"global_step": 590000, "eval_re": [9.642848781031907, 8.095597214992678, 
7.587341028982358, 302.8374411220008, 17.661553341514363, 11.053152803443233, 
17.486075361537992, 16.73377123206191, 10.759274069065176, 13.387520205406412], 
"eval_len": [29, 22, 25, 178, 29, 21, 28, 29, 23, 33]}

 60%|█████▉    | 599999/1000000 [8:39:10<4:17:50, 25.86it/s]global step 600000, trans_decision ep_re 13.053384723886023

{"global_step": 600000, "eval_re": [17.366443705769207, 12.734642206459013, 
9.515528532282453, 7.622595501816215, 10.740233167407828, 15.016069216842194, 
17.982593069623686, 7.490856423703962, 15.04967572129135, 17.01520969366432], 
"eval_len": [31, 24, 23, 18, 20, 27, 33, 18, 25, 28]}

 61%|██████    | 609999/1000000 [8:47:40<4:10:15, 25.97it/s]global step 610000, trans_decision ep_re 385.2975543982773

{"global_step": 610000, "eval_re": [142.50393877207352, 6.008418532500646, 
857.987197048308, 160.51142094679687, 1006.0935114020167, 9.298055538796032, 
1353.1260530882514, 9.867101600886219, 7.418653786512944, 300.16119326663124], 
"eval_len": [96, 25, 293, 97, 302, 21, 401, 26, 27, 173]}

 62%|██████▏   | 619998/1000000 [8:56:30<4:02:30, 26.12it/s]global step 620000, trans_decision ep_re 501.9821754817377

{"global_step": 620000, "eval_re": [424.70297722025094, 647.9606066910591, 
522.3877196415845, 764.802050593927, 128.27177165878453, 33.96026212833245, 
259.3696739513256, 517.7670686408238, 1708.8427182082464, 11.756906083043203], 
"eval_len": [181, 258, 210, 284, 87, 41, 136, 215, 507, 27]}

 63%|██████▎   | 629999/1000000 [9:04:51<3:55:51, 26.15it/s]global step 630000, trans_decision ep_re 36.22125098381054

{"global_step": 630000, "eval_re": [15.058135358609475, 10.832599583790294, 
15.887227554001532, 12.232341710517902, 8.233089874890931, 9.397349872011214, 
32.32733511429924, 12.708077083400909, 239.33860636659753, 6.197747319986385], 
"eval_len": [28, 40, 26, 24, 19, 18, 80, 27, 235, 19]}

 64%|██████▍   | 639998/1000000 [9:13:22<3:46:51, 26.45it/s]global step 640000, trans_decision ep_re 195.78949608830456

{"global_step": 640000, "eval_re": [434.6745295127565, 519.2885033998571, 
40.896483361025346, 172.03848250447155, 9.01863154041597, 11.355610155810776, 
11.23967644315664, 595.0000869417981, 148.58017644899724, 15.80278057475663], 
"eval_len": [165, 212, 89, 119, 23, 23, 24, 231, 97, 26]}

 65%|██████▍   | 649997/1000000 [9:22:10<3:44:27, 25.99it/s]global step 650000, trans_decision ep_re 229.34358571481522

{"global_step": 650000, "eval_re": [12.001556697131338, 573.9029196502089, 
8.558220748293701, 7.632325491150981, 9.422466659306373, 10.555787677104647, 
1135.4408646268957, 14.173173025911796, 17.684983777703696, 504.06355879444453],
"eval_len": [28, 194, 23, 19, 20, 31, 326, 29, 31, 187]}

 66%|██████▌   | 659999/1000000 [9:30:40<3:39:01, 25.87it/s]global step 660000, trans_decision ep_re 239.76215313686276

{"global_step": 660000, "eval_re": [694.5483039771334, 12.643759984173808, 
2.2727348271303343, 14.383106461843974, 522.260919700478, 116.26091557575987, 
9.77349598220282, 6.851000763078654, 9.898590447967477, 1008.7287036488589], 
"eval_len": [245, 24, 13, 27, 205, 147, 21, 19, 25, 355]}

 67%|██████▋   | 669997/1000000 [9:39:03<3:33:49, 25.72it/s]global step 670000, trans_decision ep_re 380.00891604445144

{"global_step": 670000, "eval_re": [13.714882342318836, 6.510512103222621, 
521.3936781592752, 20.954726868699073, 8.45393083233798, 14.61620472160417, 
12.146266812623036, 2948.161943118836, 14.080653208241046, 240.05636227735627], 
"eval_len": [27, 19, 209, 30, 28, 28, 30, 753, 26, 147]}

 68%|██████▊   | 679999/1000000 [9:47:50<3:28:18, 25.60it/s]global step 680000, trans_decision ep_re 258.2808966835522

{"global_step": 680000, "eval_re": [1753.3878299839266, 65.64876540945244, 
21.673915504391616, 10.193650044975463, 27.292622367571287, 4.74694780604363, 
379.4696295406131, 290.14600274154213, 13.6721333774532, 16.57747005955296], 
"eval_len": [529, 87, 39, 21, 42, 17, 153, 183, 38, 28]}

 69%|██████▉   | 689999/1000000 [9:56:30<3:19:15, 25.93it/s]global step 690000, trans_decision ep_re 374.00237775131274

{"global_step": 690000, "eval_re": [1218.1877272281176, 208.95674194464303, 
148.60839398113134, 702.1197434115265, 244.9682161743439, 6.096481266054978, 
581.8776079108761, 17.060638804169812, 593.635420882729, 18.512805909534958], 
"eval_len": [351, 126, 128, 308, 138, 21, 217, 32, 233, 30]}

 70%|██████▉   | 699999/1000000 [10:05:00<3:11:28, 26.11it/s]global step 700000, trans_decision ep_re 436.32488717849355

{"global_step": 700000, "eval_re": [1560.5734749616288, 33.6374954910149, 
1115.1125300812507, 130.36471536416389, 7.3261459240030185, 872.2784865049555, 
13.518793250818828, 8.19006616679344, 147.08362089617611, 475.1635431441303], 
"eval_len": [508, 44, 356, 129, 29, 361, 29, 22, 129, 207]}

 71%|███████   | 709998/1000000 [10:13:40<3:04:15, 26.23it/s]global step 710000, trans_decision ep_re 357.24610294543277

{"global_step": 710000, "eval_re": [711.8982987057113, 272.6495971771977, 
863.1702675358649, 949.5943602599391, 345.8970676480877, 166.46726236445446, 
6.248692719096002, 9.417301321767038, 235.2191231625755, 11.899058559634275], 
"eval_len": [266, 135, 297, 344, 247, 111, 28, 19, 151, 25]}

 72%|███████▏  | 719999/1000000 [10:22:01<2:59:51, 25.95it/s]global step 720000, trans_decision ep_re 378.0515172796135

{"global_step": 720000, "eval_re": [923.9105691157532, 10.672057625964252, 
787.0834358029005, 193.1308229466619, 669.0472324519114, 155.9554191650012, 
517.0732704784648, 14.400602083430735, 495.39578370898414, 13.845979417063058], 
"eval_len": [312, 26, 260, 143, 274, 110, 207, 27, 190, 25]}

 73%|███████▎  | 729999/1000000 [10:30:50<2:52:22, 26.11it/s]global step 730000, trans_decision ep_re 164.1570705076729

{"global_step": 730000, "eval_re": [12.106399481894147, 27.42725015895488, 
24.369300764343066, 73.78428155118529, 10.93135497436017, 1302.9371060555839, 
132.52889663432683, 8.466971212522377, 24.19937934944401, 24.81976489411437], 
"eval_len": [23, 41, 41, 172, 26, 370, 122, 18, 40, 40]}

 74%|███████▍  | 739999/1000000 [10:39:20<2:46:06, 26.09it/s]global step 740000, trans_decision ep_re 103.8822020554868

{"global_step": 740000, "eval_re": [14.932627390730072, 4.826771622492483, 
508.56350341353, 15.672170991220918, 122.22912234504686, 5.530244232933922, 
102.54581290953534, 246.19673212385786, 6.186306507270097, 12.138729018250297], 
"eval_len": [29, 16, 201, 29, 95, 28, 80, 132, 25, 22]}

 75%|███████▍  | 749999/1000000 [10:47:41<2:39:42, 26.09it/s]global step 750000, trans_decision ep_re 537.2103734544302

{"global_step": 750000, "eval_re": [14.19856955274497, 169.24883336578458, 
223.0611355764199, 14.59266184019832, 14.51048661428788, 1235.3919775563677, 
2291.8502018324316, 220.10318399691212, 169.96104398506915, 1019.1856402240854],
"eval_len": [25, 100, 136, 29, 27, 366, 649, 130, 87, 339]}

 76%|███████▌  | 759999/1000000 [10:56:30<2:34:18, 25.92it/s]global step 760000, trans_decision ep_re 386.8999681501085

{"global_step": 760000, "eval_re": [361.54509060517205, 12.39148975010062, 
162.15207609342914, 9.87384227376697, 14.438842679273943, 1071.2125343773482, 
9.645196806101206, 1821.1828781092104, 124.78268642524009, 281.7750443814431], 
"eval_len": [211, 23, 106, 24, 26, 325, 30, 460, 108, 148]}

 77%|███████▋  | 769999/1000000 [11:05:00<2:25:34, 26.33it/s]global step 770000, trans_decision ep_re 401.987549713148

{"global_step": 770000, "eval_re": [10.745479815011302, 1699.8798263592716, 
10.949750123699319, 1569.8848644140237, 340.446418627674, 19.020889383835417, 
324.0054470541455, 18.142800071427317, 15.782262783231271, 11.017758499160063], 
"eval_len": [30, 556, 22, 441, 131, 30, 217, 27, 29, 20]}

 78%|███████▊  | 779999/1000000 [11:13:21<2:20:02, 26.18it/s]global step 780000, trans_decision ep_re 443.87453461236385

{"global_step": 780000, "eval_re": [28.25083669829775, 6.468517783392743, 
893.3940898869622, 11.440742019678376, 17.788002575051323, 12.85671904978909, 
1703.3347143424212, 171.6058864648655, 131.84146404696423, 1461.764373256216], 
"eval_len": [45, 19, 349, 21, 29, 27, 449, 108, 140, 393]}

 79%|███████▉  | 789999/1000000 [11:21:52<2:14:08, 26.09it/s]global step 790000, trans_decision ep_re 139.400000303502

{"global_step": 790000, "eval_re": [31.798112353753464, 470.97930760188063, 
11.432477939853065, 20.180391509728118, 105.38147843463021, 152.268244335803, 
14.672175688802778, 11.943915422729553, 562.7158046580416, 12.628095089797347], 
"eval_len": [42, 234, 24, 27, 95, 115, 25, 25, 227, 26]}

 80%|███████▉  | 799998/1000000 [11:30:21<2:06:07, 26.43it/s]global step 800000, trans_decision ep_re 338.40112348723994

{"global_step": 800000, "eval_re": [12.962240369370228, 225.6348103570334, 
8.433375138438873, 306.59846659057666, 184.29998324329523, 973.2989637262779, 
156.72858487496114, 274.35625286818976, 241.99933717878275, 999.6992205254736], 
"eval_len": [22, 130, 21, 147, 110, 308, 128, 145, 193, 306]}

 81%|████████  | 809999/1000000 [11:38:53<2:01:19, 26.10it/s]global step 810000, trans_decision ep_re 170.1471805574973

{"global_step": 810000, "eval_re": [6.713219846651698, 921.7522892923744, 
13.637575369984559, 465.0428386685769, 38.7985427026973, 36.80307707975715, 
14.657842522229407, 8.829819166336039, 162.64429536466244, 32.592305561703476], 
"eval_len": [19, 287, 23, 221, 45, 43, 27, 24, 92, 43]}

 82%|████████▏ | 819998/1000000 [11:47:21<1:53:19, 26.47it/s]global step 820000, trans_decision ep_re 407.38869102102376

{"global_step": 820000, "eval_re": [61.601297323211625, 475.25230029447926, 
652.457827783362, 453.4018480313782, 179.6037349869917, 11.951000815146573, 
1417.990366255124, 747.9071249857931, 62.053279259373525, 11.668130475377923], 
"eval_len": [66, 185, 289, 189, 118, 28, 383, 252, 93, 29]}

 83%|████████▎ | 829998/1000000 [11:55:53<1:46:44, 26.54it/s]global step 830000, trans_decision ep_re 274.13247231311686

{"global_step": 830000, "eval_re": [328.20298187759204, 18.676495846328518, 
147.85298296914723, 31.432603638621654, 157.24204211542852, 194.40867981905927, 
715.9639300388885, 254.09421425348572, 10.129039960756858, 883.3217526118605], 
"eval_len": [175, 27, 107, 43, 110, 125, 288, 132, 23, 319]}

 84%|████████▍ | 839998/1000000 [12:04:23<1:40:47, 26.46it/s]global step 840000, trans_decision ep_re 177.5791237605127

{"global_step": 840000, "eval_re": [22.91805650363522, 30.720791981163575, 
21.07572899752969, 16.293763089773083, 13.062832497005532, 9.888078838113483, 
415.19319340529125, 21.191443111995056, 1178.0657491830011, 47.38159999761888], 
"eval_len": [42, 42, 144, 26, 76, 23, 176, 42, 336, 84]}

 85%|████████▍ | 849998/1000000 [12:13:10<1:34:39, 26.41it/s]global step 850000, trans_decision ep_re 410.81019485938066

{"global_step": 850000, "eval_re": [11.196001708220786, 35.2397492399352, 
894.488803607969, 846.0824168308407, 6.259665595006434, 661.2365759040871, 
14.526787414362804, 1035.4968495253343, 255.62164963058044, 347.95344913746976],
"eval_len": [22, 44, 281, 243, 16, 297, 30, 298, 130, 188]}

 86%|████████▌ | 859999/1000000 [12:21:40<1:29:14, 26.15it/s]global step 860000, trans_decision ep_re 309.27707615586127

{"global_step": 860000, "eval_re": [31.6564977755528, 18.930540859346433, 
6.948008604248772, 1815.424970271929, 1147.6031877435007, 20.387013063452816, 
24.780051870535587, 12.474585609950566, 9.076253441956785, 5.489652318139649], 
"eval_len": [42, 41, 19, 473, 343, 29, 44, 28, 19, 18]}

 87%|████████▋ | 869999/1000000 [12:29:54<1:22:39, 26.21it/s]global step 870000, trans_decision ep_re 108.5656401954396

{"global_step": 870000, "eval_re": [13.134956197606737, 452.7770081600362, 
39.445389905335404, 96.71163250038997, 13.775347841939176, 12.392358731802911, 
13.823947908727988, 145.8325364047001, 287.42772060419276, 10.335503699664828], 
"eval_len": [24, 155, 43, 143, 29, 27, 27, 94, 166, 29]}

 88%|████████▊ | 879998/1000000 [12:38:23<1:14:38, 26.79it/s]global step 880000, trans_decision ep_re 323.4085942025056

{"global_step": 880000, "eval_re": [36.19078921078753, 807.761487275971, 
17.408627913914927, 16.244198901799052, 601.049307087863, 15.343099748763258, 
1133.17469096869, 578.3877549969427, 13.168012637335126, 15.357973282989574], 
"eval_len": [43, 273, 28, 26, 251, 25, 379, 225, 23, 29]}

 89%|████████▉ | 889997/1000000 [12:46:54<1:09:56, 26.21it/s]global step 890000, trans_decision ep_re 118.27351184538381

{"global_step": 890000, "eval_re": [354.00799522322217, 17.204925751781442, 
26.52143919967533, 8.150619347258774, 23.243619672433898, 37.53094775218896, 
430.7223913887658, 41.07691206747416, 221.53019191915698, 22.746076131880475], 
"eval_len": [177, 26, 42, 24, 42, 45, 199, 44, 118, 41]}

 90%|████████▉ | 899998/1000000 [12:55:40<1:02:56, 26.48it/s]global step 900000, trans_decision ep_re 436.4750972668577

{"global_step": 900000, "eval_re": [135.0410463163903, 23.28355939815816, 
15.78908390839558, 156.0489674782015, 1436.500529685034, 328.5252627202846, 
23.459183782740656, 27.68471391149746, 2203.5277091574553, 14.89091631041942], 
"eval_len": [133, 43, 29, 114, 394, 177, 41, 41, 560, 31]}

 91%|█████████ | 909999/1000000 [13:04:10<57:18, 26.17it/s]global step 910000, trans_decision ep_re 203.4342682109743

{"global_step": 910000, "eval_re": [443.2418021377983, 4.5579577561863385, 
8.057724570734624, 14.89301867502268, 1055.484016316601, 5.841571462330845, 
168.68225402590735, 155.83977458340817, 172.5110710322506, 5.23349154950322], 
"eval_len": [199, 14, 22, 28, 317, 21, 87, 90, 117, 17]}

 92%|█████████▏| 919997/1000000 [13:12:24<50:47, 26.25it/s]global step 920000, trans_decision ep_re 155.1062862349228

{"global_step": 920000, "eval_re": [13.419478078457683, 10.20579373862161, 
20.17115959707365, 204.77671658672028, 11.070318606250941, 121.46732470927512, 
593.0493118081052, 19.83190111195886, 9.128240840185429, 547.9426172725794], 
"eval_len": [28, 24, 41, 137, 37, 127, 216, 41, 21, 279]}

 93%|█████████▎| 929997/1000000 [13:21:10<44:43, 26.09it/s]global step 930000, trans_decision ep_re 810.8884939515916

{"global_step": 930000, "eval_re": [12.079425997665574, 28.377943415485937, 
1192.1713645013144, 8.163976656149183, 1676.7920689771627, 1342.5617270588718, 
16.01775695466893, 40.48178422983882, 10.08372508339574, 3782.155166641363], 
"eval_len": [24, 41, 455, 25, 399, 471, 30, 46, 20, 883]}

 94%|█████████▍| 939998/1000000 [13:29:40<37:46, 26.47it/s]global step 940000, trans_decision ep_re 385.2624796795064

{"global_step": 940000, "eval_re": [27.286761432271398, 10.2152455651173, 
12.383795335575083, 1694.407034146499, 811.0337442022574, 13.331652897769379, 
20.993305294853666, 11.979170877948, 45.87481076836614, 1205.1192762744067], 
"eval_len": [44, 25, 29, 431, 298, 27, 40, 22, 45, 373]}

 95%|█████████▍| 949998/1000000 [13:38:10<31:36, 26.36it/s]global step 950000, trans_decision ep_re 678.2299911668038

{"global_step": 950000, "eval_re": [513.2595599334063, 260.83622935604353, 
243.8822233518522, 14.933707151339659, 13.346711553506744, 212.33677685988465, 
321.92858918137796, 1338.1960693598808, 174.2739440854002, 3689.3061008353457], 
"eval_len": [227, 169, 139, 28, 29, 127, 223, 381, 122, 841]}

 96%|█████████▌| 959997/1000000 [13:46:32<25:24, 26.24it/s]global step 960000, trans_decision ep_re 105.15459549411287

{"global_step": 960000, "eval_re": [26.693396608906223, 29.987738327518652, 
7.877446509867671, 6.252313573574038, 7.5861632225340605, 7.262600108131534, 
31.17054093316501, 14.214174261697346, 901.3656189122452, 19.13596248348908], 
"eval_len": [41, 42, 22, 16, 26, 19, 41, 26, 286, 41]}

 97%|█████████▋| 969999/1000000 [13:55:00<19:01, 26.29it/s]global step 970000, trans_decision ep_re 168.31944732940434

{"global_step": 970000, "eval_re": [15.31876385022213, 483.27252891218774, 
13.544323163688139, 735.5294783468414, 32.03736834596889, 9.416807556327617, 
15.703459932268304, 140.2522340993021, 11.143183062777252, 226.97632602446], 
"eval_len": [27, 196, 25, 244, 46, 19, 26, 85, 22, 141]}

 98%|█████████▊| 979999/1000000 [14:03:40<12:44, 26.17it/s]global step 980000, trans_decision ep_re 96.70969954178705

{"global_step": 980000, "eval_re": [16.257413519695778, 11.113574283980512, 
13.348770230099538, 282.8987727351693, 9.170678183301401, 32.19807596294793, 
238.8143130211085, 11.232839418736194, 340.8869918030164, 11.175566259814781], 
"eval_len": [28, 25, 28, 147, 21, 42, 129, 25, 226, 27]}

 99%|█████████▉| 989999/1000000 [14:12:10<06:22, 26.13it/s]global step 990000, trans_decision ep_re 293.18772618730407

{"global_step": 990000, "eval_re": [9.723405721775213, 13.834425074729188, 
53.751363819897826, 7.325740153823708, 12.07517024509402, 233.89823228279866, 
1282.0188289694236, 13.300294376363144, 462.7560729778986, 843.1937282512372], 
"eval_len": [22, 25, 213, 17, 25, 174, 370, 25, 201, 269]}

100%|█████████▉| 999999/1000000 [14:20:40<00:00, 26.04it/s]global step 1000000, trans_decision ep_re 147.54292351961269

{"global_step": 1000000, "eval_re": [35.623462310096286, 16.349170595201812, 
8.06163854608901, 14.685335446395413, 15.189034548804901, 24.69650325268234, 
32.203165736162354, 5.73424416331015, 21.16829198473968, 1301.718388612645], 
"eval_len": [45, 25, 18, 30, 26, 41, 43, 17, 41, 399]}

100%|██████████| 1000000/1000000 [14:20:45<00:00, 19.36it/s]
