
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.25
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9998/1000000 [03:22<8:36:45, 31.93it/s]global step 10000, trans_decision ep_re 79.21349062123635

{"global_step": 10000, "eval_re": [1.493922767315123, 14.045678590345032, 
12.582912083348, 5.434028876599597, 0.8041114751961809, 237.85293728475426, 
348.613089444492, 5.675311847778729, 133.37083227769818, 32.26208156483641], 
"eval_len": [22, 85, 23, 17, 15, 142, 399, 16, 71, 121]}

  2%|▏         | 19997/1000000 [10:06<8:27:53, 32.16it/s]global step 20000, trans_decision ep_re 18.63170475660776

{"global_step": 20000, "eval_re": [-1.7429999283408946, -0.08444004110857403, 
0.8714202833597453, 2.6805121076693585, -0.09528486970959238, 
0.7210154454513661, 143.328037845817, 2.4433965094473757, 30.755295066701343, 
7.44009514679048], "eval_len": [17, 10, 11, 18, 13, 18, 174, 18, 44, 78]}

  3%|▎         | 29997/1000000 [16:48<8:26:18, 31.93it/s]global step 30000, trans_decision ep_re 25.446248057915994

{"global_step": 30000, "eval_re": [9.181709504536634, 59.8848062065314, 
-0.21338604800867333, 149.16408004151512, 13.838832708100567, 9.090187711802901,
-1.749614335246192, 10.442177271990976, 0.8838901037101041, 3.9397974142271273],
"eval_len": [34, 100, 11, 90, 30, 20, 15, 40, 26, 14]}

  4%|▍         | 39997/1000000 [23:30<8:16:06, 32.25it/s]global step 40000, trans_decision ep_re 38.44328158214116

{"global_step": 40000, "eval_re": [17.11538770396825, 29.10925755471488, 
50.87141109677726, 99.27411468901286, 1.4821637259090814, 60.888702471151845, 
59.298477583829715, 3.483693336457632, 60.26304516135053, 2.6465624982395526], 
"eval_len": [36, 97, 53, 104, 13, 97, 151, 29, 88, 14]}

  5%|▍         | 49999/1000000 [30:16<8:18:02, 31.79it/s]global step 50000, trans_decision ep_re 62.445094457131425

{"global_step": 50000, "eval_re": [108.13988757799618, 2.9277695626440536, 
1.1961137609000927, 3.5125744173567095, 10.454188264003427, 
-0.03241745138701024, 108.12948861120027, 124.63176403438756, 4.892504794290744,
260.59907099992216], "eval_len": [89, 13, 11, 13, 33, 12, 133, 170, 14, 177]}

  6%|▌         | 59997/1000000 [37:04<8:10:14, 31.96it/s]global step 60000, trans_decision ep_re 52.5531419766556

{"global_step": 60000, "eval_re": [3.7160275829944798, -0.5109729258636369, 
269.08122577800265, 13.109192899405969, 4.5060978090775095, 204.23942831832457, 
-2.498693708706787, 10.086916370113633, 6.007876132062839, 17.79432151114485], 
"eval_len": [27, 20, 174, 32, 30, 114, 27, 35, 34, 35]}

  7%|▋         | 69997/1000000 [43:52<8:07:37, 31.79it/s]global step 70000, trans_decision ep_re 115.20602583119098

{"global_step": 70000, "eval_re": [366.9466226630024, 31.348975105562054, 
3.1497341520316695, -0.796316206699879, 156.64989223645432, 57.0758922749267, 
487.04024698081395, 16.393525092754693, 4.055170363211196, 30.19651564985298], 
"eval_len": [161, 47, 23, 23, 122, 92, 235, 37, 13, 59]}

  8%|▊         | 79999/1000000 [50:50<7:59:51, 31.95it/s]global step 80000, trans_decision ep_re 143.8054631040926

{"global_step": 80000, "eval_re": [309.0578152180459, 99.29869995541469, 
445.47796214507395, 4.4927673849744005, -0.729289760318174, 9.4462567878048, 
310.616760946454, 7.660015336342073, 1.4355166299993685, 251.29812639713515], 
"eval_len": [150, 146, 189, 16, 16, 19, 197, 18, 10, 132]}

  9%|▉         | 89997/1000000 [57:24<7:41:50, 32.84it/s]global step 90000, trans_decision ep_re 103.20450107919294

{"global_step": 90000, "eval_re": [5.295458410927257, -1.03133096754461, 
2.9075775628645446, 0.5665579610032663, 500.32976557992316, 223.35100726546776, 
293.89728677028216, 3.6310770438725903, 1.5767786919089197, 1.5208324732244145],
"eval_len": [30, 19, 15, 15, 244, 110, 131, 18, 13, 20]}

 10%|▉         | 99999/1000000 [1:04:08<7:53:13, 31.70it/s]global step 100000, trans_decision ep_re 126.1515348075012

{"global_step": 100000, "eval_re": [191.27167264738654, 402.2719326326766, 
3.148051866748077, 5.66218825665183, 9.6212311322166, 348.64060211151366, 
291.2521864262724, 5.263264033055203, 3.9612003133588445, 0.42301865513245107], 
"eval_len": [103, 176, 15, 17, 24, 221, 177, 17, 17, 13]}

 11%|█         | 109997/1000000 [1:10:51<7:32:35, 32.77it/s]global step 110000, trans_decision ep_re 105.94904765983692

{"global_step": 110000, "eval_re": [0.5651238103283112, -0.64808987495329, 
17.382775007470972, -2.6570213737101636, 26.337998510514467, 311.83159884106846,
9.182111764635415, 1.9555795114802175, 693.9342147642178, 1.606185637317234], 
"eval_len": [34, 21, 33, 20, 38, 177, 32, 13, 378, 16]}

 12%|█▏        | 119999/1000000 [1:17:35<7:43:59, 31.61it/s]global step 120000, trans_decision ep_re 146.47280108516648

{"global_step": 120000, "eval_re": [418.40386175457024, 5.168550875703216, 
3.0973788318198414, 1018.2712406981184, 5.569982793379443, 6.995278672030804, 
4.222464609287408, 4.387073233196397, -4.642589969900333, 3.2547693534592734], 
"eval_len": [234, 16, 18, 563, 17, 20, 16, 14, 27, 17]}

 13%|█▎        | 129997/1000000 [1:24:20<7:29:56, 32.23it/s]global step 130000, trans_decision ep_re 184.176057073403

{"global_step": 130000, "eval_re": [3.9711091645035816, 5.806598031501495, 
-1.1674202648032743, 532.1060828531962, 13.409264369328621, 2.214834005284003, 
709.0568719320665, 5.59010426502135, 161.1828798962014, 409.59024648173016], 
"eval_len": [17, 17, 27, 230, 32, 18, 330, 21, 205, 193]}

 14%|█▍        | 139999/1000000 [1:31:05<7:24:35, 32.24it/s]global step 140000, trans_decision ep_re 94.10952221769085

{"global_step": 140000, "eval_re": [83.47920153377002, 2.6475169942830563, 
-0.6704297277114677, 2.1784635023000907, 569.2511190305011, 5.642976593731048, 
274.6550318780849, -1.2201376551437821, -1.1342767092489259, 6.26575673634263], 
"eval_len": [132, 15, 25, 21, 297, 17, 172, 14, 27, 19]}

 15%|█▍        | 149997/1000000 [1:38:00<7:21:26, 32.09it/s]global step 150000, trans_decision ep_re 286.68121397746734

{"global_step": 150000, "eval_re": [6.120467083336173, 450.12110768558887, 
5.0425351446518025, -2.242198859565104, 0.15179444322698954, 869.2766951309412, 
3.1979241021323834, 1526.8348817938513, 3.5066955436490614, 4.80223770686006], 
"eval_len": [18, 231, 17, 21, 26, 395, 17, 649, 15, 17]}

 16%|█▌        | 159997/1000000 [1:44:33<7:07:48, 32.72it/s]global step 160000, trans_decision ep_re 105.51240129645755

{"global_step": 160000, "eval_re": [4.346767329095832, -0.8973615949930718, 
246.4528145637327, 0.24425019620510602, 21.181892314642198, 1.9685819785773642, 
277.82006077702556, 389.96966650502225, 109.68909379750473, 4.348247097762773], 
"eval_len": [15, 10, 140, 16, 36, 15, 136, 178, 91, 18]}

 17%|█▋        | 169999/1000000 [1:51:30<7:11:44, 32.04it/s]global step 170000, trans_decision ep_re 334.48760126315995

{"global_step": 170000, "eval_re": [0.09729038967427348, 9.115967876322983, 
2.334310145987807, 175.23115713745807, 160.8667717140937, -1.7622904654339857, 
279.5051514052153, 2.745871629798229, 4.40454961703615, 2712.3372331814467], 
"eval_len": [13, 21, 19, 113, 108, 14, 161, 17, 15, 1000]}

 18%|█▊        | 179997/1000000 [1:58:20<7:07:13, 31.99it/s]global step 180000, trans_decision ep_re 177.31948860499793

{"global_step": 180000, "eval_re": [5.984145484186601, 3.98116354291295, 
9.207883383959805, 3.3537328144086698, 183.65310444822822, 4.23751867302949, 
1385.607657648773, 158.76819533995496, -2.8927370281897122, 21.29422174271541], 
"eval_len": [19, 19, 18, 21, 107, 18, 566, 129, 13, 39]}

 19%|█▉        | 189997/1000000 [2:04:56<7:01:21, 32.04it/s]global step 190000, trans_decision ep_re 82.06170484167708

{"global_step": 190000, "eval_re": [115.46727836251827, 2.4418914324515897, 
-0.0077120644791827675, -1.4811627918422143, 376.1459167536626, 
10.087379617635708, 5.39420548291318, 122.26429146169005, 1.7798890733052777, 
188.5250710889155], "eval_len": [82, 21, 21, 12, 174, 19, 27, 93, 19, 154]}

 20%|█▉        | 199997/1000000 [2:11:42<6:59:03, 31.82it/s]global step 200000, trans_decision ep_re 222.2361426421021

{"global_step": 200000, "eval_re": [124.27347599519483, 7.1639035728023135, 
385.21904886119796, 27.89067515191979, 78.11372092085904, 615.3041869633298, 
7.006708331687523, 511.05183723631666, 1.3118525105746095, 465.0260168771383], 
"eval_len": [85, 34, 220, 49, 109, 232, 21, 285, 19, 193]}

 21%|██        | 209999/1000000 [2:18:30<6:58:02, 31.50it/s]global step 210000, trans_decision ep_re 164.56408078002016

{"global_step": 210000, "eval_re": [108.5560490469838, 2.943193719747544, 
951.4550857673597, 9.431044642971157, 4.940169923146426, 3.9330687684407772, 
549.9428744461153, 12.140503919059832, 0.3208178664364803, 1.9779996999405165], 
"eval_len": [77, 16, 341, 21, 18, 14, 267, 39, 14, 17]}

 22%|██▏       | 219997/1000000 [2:25:17<6:48:59, 31.79it/s]global step 220000, trans_decision ep_re 42.277092042461604

{"global_step": 220000, "eval_re": [31.212703312960887, 1.616813344118985, 
0.5610795847655179, 4.8261297474666565, 0.7482710661916787, 2.9512633365903698, 
0.18637190968833459, 11.567449630673117, 12.642948120683245, 356.4578903714772],
"eval_len": [67, 31, 23, 18, 13, 12, 16, 32, 24, 164]}

 23%|██▎       | 229997/1000000 [2:32:02<6:46:27, 31.57it/s]global step 230000, trans_decision ep_re 107.05415217189488

{"global_step": 230000, "eval_re": [8.829697331532488, -2.451517204464153, 
3.575305964419587, 22.355242617131832, 1.2844565154372833, 384.3146276523055, 
612.3819294784771, 5.858370776693327, 33.55204885981696, 0.8413597275990513], 
"eval_len": [22, 19, 17, 39, 17, 173, 243, 17, 65, 13]}

 24%|██▍       | 239999/1000000 [2:39:00<6:37:47, 31.84it/s]global step 240000, trans_decision ep_re 157.87353052956763

{"global_step": 240000, "eval_re": [7.668560627048993, 21.797961422477506, 
688.3517553985005, -3.400547035975907, 4.03830920783274, 201.45076119880514, 
37.33075144836992, -5.414133920546201, 626.021870494625, 0.8900164545386673], 
"eval_len": [20, 37, 314, 21, 15, 162, 41, 25, 224, 11]}

 25%|██▍       | 249997/1000000 [2:45:35<6:30:34, 32.00it/s]global step 250000, trans_decision ep_re 235.81918610024232

{"global_step": 250000, "eval_re": [-2.0277188452855395, 26.85840096226048, 
855.227432080316, 22.461772521112124, 32.99012458162339, 79.5951338550726, 
152.3761870245624, -0.16554627316943854, -2.417673524730372, 
1193.2937486206617], "eval_len": [13, 38, 353, 37, 55, 88, 109, 12, 15, 405]}

 26%|██▌       | 259999/1000000 [2:52:23<6:25:29, 31.99it/s]global step 260000, trans_decision ep_re 103.04337264056753

{"global_step": 260000, "eval_re": [2.811409087424855, 237.37285829676247, 
1.4644363155564994, -1.8581741598498913, 1.670994560437503, -2.256469322744583, 
12.729068289560086, 604.3676858919434, 168.36535424176927, 5.766563204815646], 
"eval_len": [19, 123, 17, 14, 14, 18, 36, 233, 107, 17]}

 27%|██▋       | 269999/1000000 [2:59:20<6:20:05, 32.01it/s]global step 270000, trans_decision ep_re 194.4677116993974

{"global_step": 270000, "eval_re": [45.73880102351573, 927.5803748532901, 
4.485552584603774, 4.835873595514361, 123.59792518283467, 8.353557465212033, 
2.8347557730687094, 73.05235899362391, 752.6636275215076, 1.5342900008030464], 
"eval_len": [64, 341, 14, 18, 92, 19, 14, 125, 292, 19]}

 28%|██▊       | 279997/1000000 [3:05:56<6:13:16, 32.15it/s]global step 280000, trans_decision ep_re 35.83147979629124

{"global_step": 280000, "eval_re": [2.961252668093714, 0.37347330089326125, 
3.099463532379413, 343.1399701447184, 0.864887876030367, -1.9116414346392825, 
-0.5763841443312119, -1.845070468830639, 6.831930973943928, 5.3769155146544625],
"eval_len": [12, 13, 20, 166, 11, 25, 17, 11, 20, 16]}

 29%|██▉       | 289997/1000000 [3:12:38<6:09:39, 32.01it/s]global step 290000, trans_decision ep_re 122.01063895911716

{"global_step": 290000, "eval_re": [8.058957967453951, 274.8079497126337, 
5.1000874496753825, -0.82708788145165, -2.8171809063119317, -1.6728972608112103,
5.034340074111759, 199.03078222478987, 468.5318907152333, 264.8595474958484], 
"eval_len": [21, 162, 19, 12, 17, 9, 19, 105, 213, 146]}

 30%|██▉       | 299996/1000000 [3:19:30<5:56:08, 32.76it/s]global step 300000, trans_decision ep_re 368.54070860191865

{"global_step": 300000, "eval_re": [4.03538843961258, 3.3544655782098833, 
684.5084604845892, 566.6505102203231, 508.82695946433734, 0.15086623246448227, 
5.03984451415569, 15.278371092472241, 528.0176585733001, 1369.5445614197215], 
"eval_len": [18, 22, 261, 233, 207, 13, 20, 37, 209, 431]}

 31%|███       | 309997/1000000 [3:26:04<6:03:31, 31.63it/s]global step 310000, trans_decision ep_re 32.15232996117918

{"global_step": 310000, "eval_re": [-2.838808489420575, -3.9883107638146527, 
1.0422125129418527, 5.810011035932619, 4.263585023633476, 2.5485714122558365, 
207.3750202122832, 6.794772445390954, 97.22735515470426, 3.2888910678847805], 
"eval_len": [21, 17, 11, 17, 21, 12, 106, 17, 104, 13]}

 32%|███▏      | 319997/1000000 [3:32:50<6:00:00, 31.48it/s]global step 320000, trans_decision ep_re 337.6768188526174

{"global_step": 320000, "eval_re": [1.0357025734607739, 6.784756585578299, 
1235.9561601488113, 5.821747219131316, 5.354049027706288, 1722.7431540018347, 
184.77943854887383, 219.59064849442856, -0.06857403036267998, 
-5.22889404328926], "eval_len": [14, 17, 435, 19, 21, 531, 134, 123, 18, 26]}

 33%|███▎      | 329999/1000000 [3:39:43<5:51:24, 31.78it/s]global step 330000, trans_decision ep_re 339.6689244496076

{"global_step": 330000, "eval_re": [-7.575225697258376, 1.4622139966919268, 
2522.975671716536, 210.87864624798792, 6.402492255521645, 3.382810661768611, 
0.5243597368147246, 10.370847262820917, 4.287008727079123, 643.9804195881133], 
"eval_len": [88, 11, 775, 148, 22, 30, 19, 34, 17, 288]}

 34%|███▍      | 339997/1000000 [3:46:31<5:45:13, 31.86it/s]global step 340000, trans_decision ep_re 138.59645367660417

{"global_step": 340000, "eval_re": [-0.0007573749482454387, 538.5378315679228, 
3.1237219864557955, 4.8867846602627525, 18.51702856953416, -0.9943955420128323, 
3.8888114831922778, 489.7855518305677, 325.6556626109297, 2.564296974137286], 
"eval_len": [10, 199, 19, 16, 37, 13, 17, 239, 166, 14]}

 35%|███▍      | 349999/1000000 [3:53:30<5:41:01, 31.77it/s]global step 350000, trans_decision ep_re 250.17278842557653

{"global_step": 350000, "eval_re": [200.20987136086924, 6.704558802059267, 
2.2324077547065664, 26.510298348220115, 546.9095438669815, 1.7714875859107466, 
3.0650239057589923, 2.8228191413823196, 1708.0819383435487, 3.419935146327642], 
"eval_len": [135, 18, 15, 83, 224, 15, 14, 19, 567, 21]}

 36%|███▌      | 359997/1000000 [4:00:05<5:31:49, 32.15it/s]global step 360000, trans_decision ep_re 200.34900576001877

{"global_step": 360000, "eval_re": [293.05280675776, -1.5206241003436463, 
2.9841414706155174, 34.37294680783049, 4.2205770560120195, 1048.843306806371, 
0.006861188960622822, 104.88744042213729, 29.126683747283913, 
487.5159174435605], "eval_len": [141, 18, 13, 119, 17, 347, 12, 78, 49, 214]}

 37%|███▋      | 369999/1000000 [4:06:53<5:28:56, 31.92it/s]global step 370000, trans_decision ep_re 262.2516574558926

{"global_step": 370000, "eval_re": [1445.8855172536576, 3.758191024492494, 
339.20924238345117, 5.71863709229173, 2.6936193938727344, 813.0899912032475, 
-2.8302753959222446, 8.158614788207807, 3.2364008509586535, 3.596635964668742], 
"eval_len": [527, 14, 164, 20, 19, 300, 15, 17, 18, 19]}

 38%|███▊      | 379997/1000000 [4:13:42<5:27:59, 31.51it/s]global step 380000, trans_decision ep_re 222.76987315225443

{"global_step": 380000, "eval_re": [-2.0567573223057867, 1321.900696886587, 
3.8184868581383458, 422.9953384212996, 331.3198202941101, 48.585734833780094, 
38.69721202895091, -1.111277352884589, 58.797591822416926, 4.751885052451743], 
"eval_len": [25, 439, 20, 188, 147, 53, 50, 20, 69, 19]}

 39%|███▉      | 389999/1000000 [4:20:34<5:27:08, 31.08it/s]global step 390000, trans_decision ep_re 64.42908400756156

{"global_step": 390000, "eval_re": [1.7286932104955888, 53.7767735124653, 
5.228412964604972, 1.548082718144471, 212.3588850398262, 358.81073054168434, 
2.9359825750811304, 1.3559148367200657, -1.9349536707308397, 8.482318347324378],
"eval_len": [17, 53, 28, 33, 125, 217, 21, 13, 25, 21]}

 40%|███▉      | 399997/1000000 [4:27:21<5:14:43, 31.77it/s]global step 400000, trans_decision ep_re 363.52064825914556

{"global_step": 400000, "eval_re": [-1.375160296289544, 975.199345718313, 
3.5600709607721717, 1823.6009658671405, 295.22094525248116, 5.420394512464129, 
11.19504216934547, 365.51199336375913, 128.78343357185793, 28.08945147161178], 
"eval_len": [12, 308, 61, 553, 150, 16, 32, 155, 270, 40]}

 41%|████      | 409999/1000000 [4:34:12<5:11:59, 31.52it/s]global step 410000, trans_decision ep_re 237.23547927555086

{"global_step": 410000, "eval_re": [-1.7298544346410822, 1220.9413094617325, 
659.8905190045374, -1.4882493918668591, 455.416562635305, -2.6921328391233295, 
2.4277534450441127, 4.1338962103568715, 5.733075925163954, 29.721912739000107], 
"eval_len": [15, 407, 260, 13, 208, 18, 17, 22, 19, 47]}

 42%|████▏     | 419997/1000000 [4:41:00<5:03:05, 31.89it/s]global step 420000, trans_decision ep_re 182.35923745192184

{"global_step": 420000, "eval_re": [3.465694286871789, 1129.3730721311242, 
16.33240799881829, 6.698830500983124, 253.8222480838695, -0.567507804284805, 
30.482522239807512, -0.8727439640076469, 1.892868425805643, 382.9649826202309], 
"eval_len": [16, 405, 48, 18, 210, 13, 80, 19, 18, 154]}

 43%|████▎     | 429999/1000000 [4:47:47<4:56:24, 32.05it/s]global step 430000, trans_decision ep_re 158.11851728825258

{"global_step": 430000, "eval_re": [631.95596819169, 2.3753562229041503, 
1.9353174887879403, 8.87616703381115, -3.92716006655076, 1.536919406130538, 
-2.2372008095644604, 2.680864501642592, 2.655312096788356, 935.3336288168864], 
"eval_len": [247, 68, 22, 20, 13, 17, 20, 14, 14, 327]}

 44%|████▍     | 439997/1000000 [4:54:31<4:45:50, 32.65it/s]global step 440000, trans_decision ep_re 523.1113230556109

{"global_step": 440000, "eval_re": [489.3336710119188, 901.6886084726914, 
2.201991469185579, 176.86370645461898, 7.394645348523215, 249.8428303258478, 
1028.4297535884905, 2195.754514602268, 184.023354844906, -4.419845562340813], 
"eval_len": [216, 369, 21, 123, 16, 132, 366, 703, 130, 15]}

 45%|████▍     | 449999/1000000 [5:01:22<4:49:39, 31.65it/s]global step 450000, trans_decision ep_re 70.02549572178546

{"global_step": 450000, "eval_re": [1.4891762876038324, 5.811174825236417, 
3.5551683772458245, 4.185212968180784, 143.09313889093335, 1.659953723422656, 
1.9965701637719269, 211.72636584495334, 58.46982039428957, 268.26837574221685], 
"eval_len": [18, 19, 17, 17, 73, 11, 19, 117, 245, 128]}

 46%|████▌     | 459997/1000000 [5:08:20<4:40:50, 32.05it/s]global step 460000, trans_decision ep_re 282.31077866792805

{"global_step": 460000, "eval_re": [867.4559164271067, 7.352927409775216, 
7.653232716094324, 1282.9942015331774, 2.0819565965010467, 4.231449313881088, 
4.188758983042986, 0.8896210952207191, 505.1786762278375, 141.0810463766435], 
"eval_len": [328, 18, 21, 450, 16, 31, 18, 11, 194, 97]}

 47%|████▋     | 469997/1000000 [5:15:10<4:41:50, 31.34it/s]global step 470000, trans_decision ep_re 134.7311649782558

{"global_step": 470000, "eval_re": [133.1755204671861, -2.7791909932404657, 
-6.978658637824433, 209.34538347760673, 5.791238237442341, 95.70515180126405, 
1.991265108267894, 0.4445853528778835, 1.9924927066664937, 908.6238622623114], 
"eval_len": [280, 12, 12, 92, 20, 83, 16, 14, 14, 321]}

 48%|████▊     | 479997/1000000 [5:21:47<4:31:56, 31.87it/s]global step 480000, trans_decision ep_re 194.91001650904408

{"global_step": 480000, "eval_re": [5.019298155306833, 1148.12475167126, 
1.1038929299303541, 10.12047232110136, 5.286543013953448, 5.7197368244675175, 
2.326059743428591, 16.18059896819414, 171.01351998392582, 584.2052914788729], 
"eval_len": [19, 424, 14, 64, 18, 19, 20, 26, 114, 263]}

 49%|████▉     | 489999/1000000 [5:28:50<4:28:18, 31.68it/s]global step 490000, trans_decision ep_re 539.719516398329

{"global_step": 490000, "eval_re": [1.971047172260345, 552.7874205464159, 
30.179378163277576, 539.2226891148274, 227.65791357110396, 3.7175811722261156, 
1298.5948389264158, 1257.9672459420804, 76.81466427767967, 1408.2823850970037], 
"eval_len": [16, 225, 56, 209, 154, 13, 486, 406, 165, 481]}

 50%|████▉     | 499997/1000000 [5:35:26<4:18:14, 32.27it/s]global step 500000, trans_decision ep_re 416.83601213517807

{"global_step": 500000, "eval_re": [4.235073982626656, 982.4134123383417, 
2.6852474920545437, -1.5338893125407662, 5.5623549654030455, 132.9681065783513, 
3035.99691884168, 1.7723824012759597, 0.34943474610787184, 3.9110793184802795], 
"eval_len": [16, 339, 20, 18, 16, 100, 914, 12, 16, 16]}

 51%|█████     | 509999/1000000 [5:42:15<4:14:59, 32.03it/s]global step 510000, trans_decision ep_re 285.73676453089763

{"global_step": 510000, "eval_re": [5.240751561124157, 76.44256177611257, 
3.1386228503842877, 372.49780857653417, 228.59808857250957, 9.243048793292298, 
6.241042604773977, 7.617195096322995, 8.95111393342697, 2139.3974115444953], 
"eval_len": [20, 74, 17, 149, 156, 39, 18, 21, 31, 586]}

 52%|█████▏    | 519997/1000000 [5:49:04<4:13:25, 31.57it/s]global step 520000, trans_decision ep_re 168.32746123115606

{"global_step": 520000, "eval_re": [-1.4831522539713846, 379.25014518536557, 
213.4345883350589, 4.420033099310749, 256.5587241825279, 3.903720222511167, 
461.3275603620113, 4.775726112202834, 356.0296043771461, 5.0576626893977785], 
"eval_len": [14, 163, 183, 19, 211, 16, 215, 18, 158, 19]}

 53%|█████▎    | 529999/1000000 [5:55:54<4:09:09, 31.44it/s]global step 530000, trans_decision ep_re 637.5220763248016

{"global_step": 530000, "eval_re": [10.066089134661594, 0.5257270003995019, 
5.784489596663843, 174.95570807664203, 1.3488498309309984, 3619.908063028811, 
6.871239199737626, 103.13592223723104, 2451.9924621396217, 0.6322130033163755], 
"eval_len": [20, 12, 17, 119, 15, 962, 19, 98, 725, 13]}

 54%|█████▍    | 539999/1000000 [6:03:00<4:01:03, 31.80it/s]global step 540000, trans_decision ep_re 341.9002261083967

{"global_step": 540000, "eval_re": [155.28991574915545, 562.3610354179605, 
5.348980514989028, 169.29496092786977, -0.16799541013195807, 3.394846045267391, 
1.6718953560623524, 1810.221827410677, 8.953237390973651, 702.6335576811441], 
"eval_len": [80, 227, 15, 90, 19, 13, 19, 630, 22, 256]}

 55%|█████▍    | 549997/1000000 [6:09:50<3:55:07, 31.90it/s]global step 550000, trans_decision ep_re 207.0583558332471

{"global_step": 550000, "eval_re": [1154.3116662813725, 523.8630797395233, 
152.63202264793472, -1.3688665513240195, 3.3932066451951677, 150.41064461282105,
3.1424705172574425, 4.231242819192678, 78.80998651495304, 1.1581051055455336], 
"eval_len": [378, 210, 77, 17, 15, 120, 18, 14, 66, 17]}

 56%|█████▌    | 559997/1000000 [6:16:27<3:51:23, 31.69it/s]global step 560000, trans_decision ep_re 242.49610479739334

{"global_step": 560000, "eval_re": [0.2652326169818493, 2.900848608040171, 
-1.5921457925214886, 2.3786613061931896, 1.6902860208995687, 2327.669333378426, 
3.76962000057435, 30.303474563229184, 8.18269943022499, 49.39303784188584], 
"eval_len": [12, 13, 12, 13, 15, 663, 19, 50, 21, 52]}

 57%|█████▋    | 569999/1000000 [6:23:15<3:45:00, 31.85it/s]global step 570000, trans_decision ep_re 87.98525838002327

{"global_step": 570000, "eval_re": [3.6396257710312225, 2.5374334795692617, 
225.72963990061064, 11.336344906905993, 8.119156970653606, 11.792162633605964, 
6.171008025606523, 603.4701218394761, -2.3459802595442056, 9.403070532317592], 
"eval_len": [18, 15, 139, 24, 22, 47, 23, 285, 12, 21]}

 58%|█████▊    | 579997/1000000 [6:30:01<3:41:01, 31.67it/s]global step 580000, trans_decision ep_re 352.790054925898

{"global_step": 580000, "eval_re": [2197.961773381336, 3.7227090834277368, 
4.461340841149076, -1.9501861555729783, 1196.6442862285476, 2.9230311297229763, 
9.389002361051602, 122.08010083613341, -3.633424215079978, -3.6980842317352742],
"eval_len": [615, 29, 21, 15, 401, 18, 18, 120, 16, 29]}

 59%|█████▉    | 589997/1000000 [6:37:00<3:32:32, 32.15it/s]global step 590000, trans_decision ep_re 203.2118746785231

{"global_step": 590000, "eval_re": [3.97037182230067, 1.4475661779224411, 
838.808880689469, 15.370019581045451, 3.4056086480496566, 290.87296453598253, 
1.5246589643913582, 763.8376961782976, 108.23462451337835, 4.646355674394001], 
"eval_len": [29, 13, 326, 65, 18, 162, 22, 255, 96, 17]}

 60%|█████▉    | 599997/1000000 [6:43:32<3:28:21, 32.00it/s]global step 600000, trans_decision ep_re 70.26071437905587

{"global_step": 600000, "eval_re": [1.6978185417710205, -3.0331772353047617, 
0.3019067326841935, 272.1168069842102, 6.0954247657414005, 187.9493009156469, 
234.56203306660223, 6.068530477676885, 2.53370729107687, -5.6852077495462785], 
"eval_len": [14, 16, 10, 166, 21, 103, 135, 18, 12, 18]}

 61%|██████    | 609997/1000000 [6:50:15<3:20:35, 32.40it/s]global step 610000, trans_decision ep_re 86.0953843226194

{"global_step": 610000, "eval_re": [137.12357371324785, 44.0023337867126, 
1.2037159340302714, 658.0437477317246, 10.76187563167505, 7.925226105303782, 
-0.12816313703237053, 1.4476832097175176, 0.531308974686758, 
0.04254127612783421], "eval_len": [102, 95, 13, 254, 22, 18, 16, 21, 17, 17]}

 62%|██████▏   | 619997/1000000 [6:57:10<3:16:24, 32.24it/s]global step 620000, trans_decision ep_re 373.59821311623097

{"global_step": 620000, "eval_re": [124.58641503266365, 866.2559812508756, 
5.6531413682089875, 1256.651149621538, -1.770316521600056, 0.6104006274464463, 
146.07349335914714, 1216.4407852168035, 122.816675118037, -1.3355939108107906], 
"eval_len": [84, 322, 20, 383, 13, 17, 101, 382, 94, 27]}

 63%|██████▎   | 629997/1000000 [7:03:44<3:13:17, 31.90it/s]global step 630000, trans_decision ep_re 566.4669754621057

{"global_step": 630000, "eval_re": [134.36292233383762, 0.5803451269179984, 
2269.1195365827116, 1.9473451608478056, -0.22403821046678007, 
0.4887125250232016, 604.6392442241337, -0.9385570743551572, 2649.436581879665, 
5.257662072742717], "eval_len": [136, 14, 669, 16, 10, 17, 244, 16, 871, 23]}

 64%|██████▍   | 639999/1000000 [7:10:31<3:04:57, 32.44it/s]global step 640000, trans_decision ep_re 193.65183100137085

{"global_step": 640000, "eval_re": [2.084817662826929, 1212.1409550711726, 
128.62365847883927, 0.9362940868362575, 15.676134492787881, 147.8873708207491, 
-1.840821961839036, 0.35032442923218443, 426.4830317418885, 4.1765451912148155],
"eval_len": [11, 407, 131, 17, 43, 116, 19, 17, 212, 15]}

 65%|██████▍   | 649998/1000000 [7:17:15<3:02:36, 31.95it/s]global step 650000, trans_decision ep_re 167.0325182509809

{"global_step": 650000, "eval_re": [495.70148390934753, 5.098091600380135, 
747.2894193224158, 6.424966222010223, 4.074991853196518, 5.22065684664067, 
4.946250435388722, 3.644256445899767, 235.10515722049198, 162.81990865403785], 
"eval_len": [286, 18, 390, 18, 23, 16, 30, 31, 118, 143]}

 66%|██████▌   | 659997/1000000 [7:24:10<2:53:45, 32.61it/s]global step 660000, trans_decision ep_re 125.13925103260706

{"global_step": 660000, "eval_re": [0.7134354444493802, 26.941948487778543, 
160.5319145857406, 7.269857618170563, 8.47468469556949, 2.4672964673314803, 
6.937204196691128, 4.063942235519009, 985.8118637436055, 48.18036285121501], 
"eval_len": [102, 56, 111, 20, 20, 16, 22, 15, 391, 58]}

 67%|██████▋   | 669997/1000000 [7:30:43<2:51:46, 32.02it/s]global step 670000, trans_decision ep_re 92.49432886619144

{"global_step": 670000, "eval_re": [1.5166109508887173, 11.75010916804456, 
1.7905792825872768, 711.6321583423027, 1.381887832286282, 21.11200690490261, 
130.08178509330045, 7.833226198393181, 36.77309977787021, 1.071825111338488], 
"eval_len": [14, 38, 15, 282, 12, 45, 92, 16, 46, 13]}

 68%|██████▊   | 679999/1000000 [7:37:25<2:45:40, 32.19it/s]global step 680000, trans_decision ep_re 340.9016535882457

{"global_step": 680000, "eval_re": [6.641649799180533, 5.358351091436617, 
3.8050163490026123, 48.39579033285667, 3.9805713747004785, -0.30256441341482626,
1843.6882111424682, 4.407200877198663, 5.911103311030287, 1487.1312060179982], 
"eval_len": [19, 18, 15, 81, 18, 20, 570, 14, 19, 493]}

 69%|██████▉   | 689997/1000000 [7:44:10<2:39:47, 32.34it/s]global step 690000, trans_decision ep_re 284.22428288105027

{"global_step": 690000, "eval_re": [0.37048818350078283, 6.399908053121917, 
68.41863555682926, 1.5709269696819517, 999.9185226134936, 761.4311633780613, 
9.802895672375442, 697.6343422445971, 30.14081880583911, 266.55512733300185], 
"eval_len": [14, 33, 177, 22, 314, 322, 20, 287, 67, 131]}

 70%|██████▉   | 699999/1000000 [7:50:55<2:35:30, 32.15it/s]global step 700000, trans_decision ep_re 486.33195360544113

{"global_step": 700000, "eval_re": [2.2376897156967988, 8.430068404766363, 
2258.227233078104, -0.027594809389112962, -3.4857596374591084, 
504.7265795875016, 4.120794139988468, 1844.7860462135534, 1.249010566420137, 
243.05546879522913], "eval_len": [12, 22, 697, 15, 25, 213, 19, 681, 15, 141]}

 71%|███████   | 709998/1000000 [7:57:42<2:28:07, 32.63it/s]global step 710000, trans_decision ep_re 406.4628065511257

{"global_step": 710000, "eval_re": [4.295608090342336, 775.6594190900411, 
2415.1848997561383, 826.2263996624357, 6.83759334633047, -2.0437136399722626, 
21.081225196431113, 6.637614973513806, 5.775806992050578, 4.973212043946421], 
"eval_len": [21, 357, 703, 330, 21, 19, 55, 20, 18, 17]}

 72%|███████▏  | 719997/1000000 [8:04:40<2:24:07, 32.38it/s]global step 720000, trans_decision ep_re 518.7884230465681

{"global_step": 720000, "eval_re": [1312.272670463069, 5.0436491423551155, 
2.389070738695625, 7.586317896939469, 140.0951794796749, 826.1291047282681, 
74.20867038560873, 31.734114102338943, 2621.5941754473065, 166.83127808142444], 
"eval_len": [378, 17, 19, 30, 77, 394, 96, 57, 694, 109]}

 73%|███████▎  | 729997/1000000 [8:11:14<2:19:07, 32.34it/s]global step 730000, trans_decision ep_re 469.85962249509487

{"global_step": 730000, "eval_re": [10.012299750189431, 1554.43677443818, 
1.2729051508697407, 313.4829379798678, 3.3283703344368885, 543.0893209925571, 
4.599321844804067, 0.2840763714387348, 3.6924479376477204, 2264.3977701509575], 
"eval_len": [23, 508, 13, 239, 22, 274, 15, 10, 22, 614]}

 74%|███████▍  | 739999/1000000 [8:18:00<2:14:03, 32.32it/s]global step 740000, trans_decision ep_re 94.02617458230011

{"global_step": 740000, "eval_re": [6.011999769852685, 0.7670745238557561, 
3.9809410616055403, 8.632774044013749, -2.1745694893082463, 4.350557468141396, 
3.309879156443651, 5.358458043125115, 906.6171908346744, 3.4074404105969682], 
"eval_len": [18, 17, 19, 17, 18, 18, 19, 28, 284, 17]}

 75%|███████▍  | 749999/1000000 [8:24:43<2:10:28, 31.93it/s]global step 750000, trans_decision ep_re 214.72946356160455

{"global_step": 750000, "eval_re": [8.655615930665546, 626.5131234402327, 
-3.521996103361097, 0.5642340285239701, 44.98877579148313, 1.7570182415878242, 
6.4433528570594385, 1459.5703260142932, 1.0695728703817218, 1.2546125451791317],
"eval_len": [19, 393, 18, 11, 54, 13, 19, 448, 17, 12]}

 76%|███████▌  | 759998/1000000 [8:31:40<2:04:27, 32.14it/s]global step 760000, trans_decision ep_re 363.3189323232185

{"global_step": 760000, "eval_re": [763.6902156240922, 4.030041818268967, 
172.82673557627052, 193.5357126337638, 555.7757975695358, 6.130193847721189, 
148.93359918029228, 3.3020864734643984, 1784.4277398860131, 0.5372006227631434],
"eval_len": [259, 20, 88, 135, 245, 17, 129, 20, 552, 17]}

 77%|███████▋  | 769997/1000000 [8:38:13<1:59:02, 32.20it/s]global step 770000, trans_decision ep_re 427.7507378266488

{"global_step": 770000, "eval_re": [10.50055981346858, 1.1992142062907871, 
1.4516782897733582, 6.508352672073945, 0.5333939481419757, 5.615359589074773, 
3725.9599660693575, 2.8828639624430017, 2.4337787283456627, 520.422210987518], 
"eval_len": [20, 14, 19, 16, 18, 22, 1000, 13, 19, 235]}

 78%|███████▊  | 779999/1000000 [8:44:58<1:54:20, 32.07it/s]global step 780000, trans_decision ep_re 147.82419913536182

{"global_step": 780000, "eval_re": [7.613221755809764, 1042.6303757307091, 
7.86850596622205, 199.12033651594777, 7.0195846300291755, 3.9978955436658556, 
1.8902533192934754, 2.4875743807136708, -0.42857878938288013, 
206.04282230061042], "eval_len": [18, 309, 20, 98, 35, 17, 15, 14, 17, 131]}

 79%|███████▉  | 789998/1000000 [8:51:41<1:47:55, 32.43it/s]global step 790000, trans_decision ep_re 163.99885613534144

{"global_step": 790000, "eval_re": [3.208142672107479, 631.6488085891024, 
103.31340334059976, 27.312179877823972, -0.3137909165531059, 5.352130258142564, 
22.380566662891546, -2.5067605149478305, -1.0833126845821734, 
850.6771940688297], "eval_len": [19, 246, 100, 51, 19, 16, 43, 19, 12, 290]}

 80%|███████▉  | 799997/1000000 [8:58:24<1:43:03, 32.34it/s]global step 800000, trans_decision ep_re 268.94236227240185

{"global_step": 800000, "eval_re": [237.33811058889685, -0.3605791061202422, 
7.396555676807534, 1067.2474279909645, 145.2210457974483, -0.8427078064464253, 
772.9841755246971, 35.54303047121884, 3.149505377057057, 421.74705820949487], 
"eval_len": [172, 16, 20, 359, 131, 10, 268, 52, 13, 186]}

 81%|████████  | 809999/1000000 [9:05:20<1:38:03, 32.30it/s]global step 810000, trans_decision ep_re 274.80551118217016

{"global_step": 810000, "eval_re": [1.704606831676938, -0.0365762986334601, 
873.635135743232, 1194.9748993758287, 0.4538709184421022, 213.86718516139123, 
365.82339826868974, 6.616734903224535, 88.41676717879163, 2.599089739058206], 
"eval_len": [17, 19, 325, 367, 15, 153, 211, 18, 100, 14]}

 82%|████████▏ | 819996/1000000 [9:11:53<1:35:11, 31.52it/s]global step 820000, trans_decision ep_re 230.66907902706572

{"global_step": 820000, "eval_re": [6.6544803774559425, 192.2035276331583, 
0.15386009346097373, 5.160576246115367, 5.522438818114916, 13.860483270605883, 
7.0774459259874565, 567.8978903564473, 22.090915279452382, 1486.0691722698584], 
"eval_len": [20, 108, 15, 23, 18, 47, 18, 206, 37, 478]}

 83%|████████▎ | 829997/1000000 [9:18:37<1:28:24, 32.05it/s]global step 830000, trans_decision ep_re 132.15892276680188

{"global_step": 830000, "eval_re": [2.147624408103405, 5.478417016788719, 
4.831762431871856, 11.80323014287627, 1282.2875188726327, -4.7173984169880745, 
10.395320158153918, 6.369648803624385, -1.0484458192194703, 4.041550070175079], 
"eval_len": [18, 17, 19, 37, 344, 20, 20, 18, 12, 15]}

 84%|████████▍ | 839997/1000000 [9:25:16<1:21:35, 32.68it/s]global step 840000, trans_decision ep_re 136.63970075055096

{"global_step": 840000, "eval_re": [4.40683578676701, -1.365972057579372, 
747.942437883266, 195.68895287818057, 7.460535326043986, 122.5532369876455, 
-3.3166292281943286, 254.82537647428111, 39.11659538140407, 
-0.9143619263050737], "eval_len": [17, 17, 326, 138, 36, 214, 15, 136, 54, 13]}

 85%|████████▍ | 849998/1000000 [9:31:56<1:17:17, 32.35it/s]global step 850000, trans_decision ep_re 233.64724678615593

{"global_step": 850000, "eval_re": [5.197114552268907, 379.74160567181815, 
184.66389292433686, 0.026701864749263365, 483.38355674898975, 
130.23083811041505, 2.160193367003646, 222.08841041692597, 3.9955976763346803, 
924.9845565287173], "eval_len": [17, 318, 134, 19, 231, 84, 17, 124, 29, 330]}

 86%|████████▌ | 859997/1000000 [9:38:37<1:12:16, 32.29it/s]global step 860000, trans_decision ep_re 240.4645073631119

{"global_step": 860000, "eval_re": [282.8192131267552, 176.20473108180892, 
6.959982159729594, 1334.245322152816, 463.0948787892455, 2.658756622816727, 
6.494377056132089, -0.7372936585068572, 132.26615221664446, 0.6389540836770105],
"eval_len": [148, 133, 19, 383, 184, 21, 31, 11, 97, 17]}

 87%|████████▋ | 869996/1000000 [9:45:18<1:06:26, 32.61it/s]global step 870000, trans_decision ep_re 231.03055416914876

{"global_step": 870000, "eval_re": [1.696289994890678, -0.04768761550174469, 
3.207693390124913, 1.4685513919882793, 224.02472887647673, 1948.3608610305166, 
116.27862467029242, 0.3272177374377629, 9.01384821777558, 5.975413997487151], 
"eval_len": [13, 18, 18, 13, 131, 662, 84, 29, 18, 18]}

 88%|████████▊ | 879999/1000000 [9:51:57<1:00:30, 33.06it/s]global step 880000, trans_decision ep_re 185.88016620407183

{"global_step": 880000, "eval_re": [-0.1691955512020326, -0.3513649783129017, 
0.3853631715053837, 2.221453542472266, 1146.0651272130497, 3.0205902195025547, 
131.00110167800042, 379.3653995476221, 192.23709094354976, 5.026096254531314], 
"eval_len": [9, 11, 17, 20, 384, 19, 112, 171, 86, 15]}

 89%|████████▉ | 889998/1000000 [9:58:36<56:02, 32.72it/s]global step 890000, trans_decision ep_re 193.79094550038707

{"global_step": 890000, "eval_re": [0.8678855705853399, -4.8374085765336385, 
1415.3590210209666, 1.9029657375489197, 4.587366383936421, 4.065961695380697, 
308.0908367119912, 77.41279255000208, -1.7318579560753682, 132.19189186606886], 
"eval_len": [14, 26, 496, 16, 14, 20, 170, 97, 12, 88]}

 90%|████████▉ | 899997/1000000 [10:05:16<51:37, 32.29it/s]global step 900000, trans_decision ep_re 225.71023717433394

{"global_step": 900000, "eval_re": [2.5999903036641037, -0.3180594534925566, 
0.19023964303638025, 221.81496538343217, 298.4034270092501, -28.072947010382414,
3.5339979392934535, 0.5991249312111491, 450.9213071241608, 1307.4303258731663], 
"eval_len": [15, 11, 12, 123, 217, 87, 17, 11, 214, 359]}

 91%|█████████ | 909996/1000000 [10:11:56<45:53, 32.68it/s]global step 910000, trans_decision ep_re 153.69906705371994

{"global_step": 910000, "eval_re": [59.25061334850443, 974.5495055179902, 
-0.13279731528313443, 7.634643912332756, 4.691928048064444, 9.862234541352949, 
14.404614559273917, 4.600373919082005, 368.1322264493617, 93.99732755652047], 
"eval_len": [85, 294, 19, 18, 34, 34, 120, 17, 253, 103]}

 92%|█████████▏| 919998/1000000 [10:18:35<40:38, 32.80it/s]global step 920000, trans_decision ep_re 279.77494576244516

{"global_step": 920000, "eval_re": [8.50847137473649, 571.3576030032503, 
85.88256849479757, 0.5310726411562028, 919.6522159795121, 5.463383340390232, 
403.0588779408539, -27.794135982870227, 827.7657742880676, 3.323626544557455], 
"eval_len": [20, 237, 72, 13, 319, 15, 202, 167, 272, 17]}

 93%|█████████▎| 929997/1000000 [10:25:16<35:39, 32.72it/s]global step 930000, trans_decision ep_re 57.86937441921824

{"global_step": 930000, "eval_re": [32.91813505506737, -5.461344535515321, 
435.83782828359404, 5.64918024442258, 3.76855179808763, 2.367658982740692, 
15.416681846643247, 4.137371914992895, 86.16635973271052, -2.1066791305611288], 
"eval_len": [64, 17, 225, 18, 18, 14, 122, 17, 91, 10]}

 94%|█████████▍| 939997/1000000 [10:31:54<31:00, 32.25it/s]global step 940000, trans_decision ep_re 199.1657484317828

{"global_step": 940000, "eval_re": [12.240977700803349, 2.1095399641931265, 
19.978556861842907, 987.8272353318854, 6.00538324538107, 4.308194164898581, 
-0.07616207195001151, -1.0246076767322945, 960.739163106267, 
-0.45079630876127985], "eval_len": [30, 21, 43, 356, 21, 19, 11, 11, 330, 21]}

 95%|█████████▍| 949996/1000000 [10:38:32<25:23, 32.83it/s]global step 950000, trans_decision ep_re 211.45359172692469

{"global_step": 950000, "eval_re": [145.14214983375138, 4.710090554930315, 
194.4375396090544, 189.70023247533717, 827.0003646231211, 207.4802028211659, 
0.5315841355819659, 221.81190582517507, 2.408190215548802, 321.3136571755812], 
"eval_len": [96, 14, 106, 120, 289, 178, 11, 161, 19, 163]}

 96%|█████████▌| 959999/1000000 [10:45:13<20:20, 32.78it/s]global step 960000, trans_decision ep_re 249.12270209648682

{"global_step": 960000, "eval_re": [23.802602308702642, 241.1116256484332, 
1.2469563448602157, 9.95437708373426, 0.9820155454265361, 1320.7879993477204, 
1.245281927611295, 735.3711533902832, 148.9624452905004, 7.762564077596103], 
"eval_len": [59, 152, 20, 19, 15, 392, 17, 263, 109, 20]}

 97%|█████████▋| 969997/1000000 [10:51:54<15:19, 32.64it/s]global step 970000, trans_decision ep_re 61.591377073973625

{"global_step": 970000, "eval_re": [-1.1054723533949184, 1.1580691781041952, 
139.92786761764762, 7.002499162125512, 461.51941297113626, 1.9288078143834593, 
-0.27757556937827615, -1.3302051438287825, -0.8145660907020787, 
7.904933153643272], "eval_len": [11, 14, 79, 20, 259, 18, 11, 17, 11, 20]}

 98%|█████████▊| 979997/1000000 [10:58:32<10:10, 32.79it/s]global step 980000, trans_decision ep_re 82.0157349314695

{"global_step": 980000, "eval_re": [5.462392927040349, 147.85997583080652, 
16.249687115087784, 1.9563972967458179, 166.3081903036447, -1.2668216715322347, 
138.45525570323463, 305.16221867561893, 36.280793498836196, 3.689259635212183], 
"eval_len": [35, 119, 39, 17, 106, 32, 75, 198, 109, 16]}

 99%|█████████▉| 989996/1000000 [11:05:12<05:06, 32.64it/s]global step 990000, trans_decision ep_re 220.42065754044364

{"global_step": 990000, "eval_re": [1814.2057159840342, -1.4280954162055373, 
0.1342740898463227, 269.2622578355438, 4.453534541341518, 7.486412441442585, 
2.2966639460571625, 109.14498769015353, -1.173445703706816, 
-0.1757300040705747], "eval_len": [496, 17, 14, 150, 32, 16, 16, 71, 15, 14]}

100%|█████████▉| 999999/1000000 [11:11:51<00:00, 32.66it/s]global step 1000000, trans_decision ep_re 256.2078146823321

{"global_step": 1000000, "eval_re": [4.390350028487729, 986.7050230438468, 
157.61716278172725, 557.0553644331403, 213.50030933485874, 4.430469011132272, 
323.3049052570839, 307.23637717863517, 2.499349458564564, 5.3388362958438424], 
"eval_len": [21, 393, 109, 239, 135, 15, 156, 179, 13, 22]}

100%|██████████| 1000000/1000000 [11:12:04<00:00, 24.80it/s]
