
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:20<12:48:00, 21.48it/s]global step 10000, trans_decision ep_re 179.22589000256988

{"global_step": 10000, "eval_re": [24.117949311192913, 341.8587733005101, 
241.88262328798024, 184.69680426156586, 233.53581620529084, 258.2491305856292, 
22.266368648547036, 20.639849708568995, 220.19773457282074, 244.813850143593], 
"eval_len": [47, 222, 139, 107, 132, 150, 33, 36, 126, 134]}

  2%|▏         | 19998/1000000 [16:00<12:42:22, 21.42it/s]global step 20000, trans_decision ep_re 138.52285046127344

{"global_step": 20000, "eval_re": [594.1842357582395, 143.57204231293255, 
27.113849473985205, 245.3255147819472, 24.517560220385235, 23.269569144880204, 
247.10983311906952, 27.27616748657502, 26.51877767231756, 26.340954642402284], 
"eval_len": [583, 113, 38, 137, 35, 38, 278, 40, 40, 36]}

  3%|▎         | 29999/1000000 [26:40<12:45:54, 21.11it/s]global step 30000, trans_decision ep_re 41.57724054598309

{"global_step": 30000, "eval_re": [25.4617815724811, 25.90963969029464, 
18.866189953914372, 23.12639761850287, 140.77683247730155, 60.75175223329542, 
51.855779031350096, 19.983879882141174, 21.696888930869253, 27.343264069680426],
"eval_len": [39, 36, 30, 37, 113, 63, 52, 29, 36, 39]}

  4%|▍         | 39999/1000000 [37:10<12:31:12, 21.30it/s]global step 40000, trans_decision ep_re 73.36889015669314

{"global_step": 40000, "eval_re": [207.98706017766034, 12.578116111462341, 
21.93857723572508, 21.14816724207821, 111.17999626618224, 207.740454960086, 
22.978402846326027, 87.15439835400221, 14.892539967570146, 26.091188405838803], 
"eval_len": [115, 23, 33, 37, 120, 128, 33, 99, 34, 41]}

  5%|▍         | 49999/1000000 [47:50<12:26:31, 21.21it/s]global step 50000, trans_decision ep_re 166.3389591441685

{"global_step": 50000, "eval_re": [138.03894365006838, 168.1474409201265, 
231.39905512257556, 18.468579500084964, 188.69553351819584, 229.8583890318411, 
16.9263852630739, 185.35881786041358, 275.4666043201621, 211.02984225514322], 
"eval_len": [93, 100, 121, 39, 109, 136, 35, 200, 174, 143]}

  6%|▌         | 59999/1000000 [58:10<12:19:02, 21.20it/s]global step 60000, trans_decision ep_re 98.11346657853187

{"global_step": 60000, "eval_re": [25.795318702356916, 248.7977990208382, 
15.796946577124517, 24.61460846153787, 397.1953501899097, 17.336971996747373, 
91.67124572167953, 26.147081533434292, 118.23092377879848, 15.548419802891903], 
"eval_len": [37, 156, 31, 38, 190, 34, 125, 40, 107, 29]}

  7%|▋         | 69998/1000000 [1:09:00<11:57:33, 21.60it/s]global step 70000, trans_decision ep_re 96.3805578625867

{"global_step": 70000, "eval_re": [151.03319093888646, 24.14170934984563, 
24.63136205159222, 30.60465769287025, 18.878447724798715, 20.064285520725743, 
28.791562705318523, 328.1127276848699, 152.14563049660492, 185.40200446035453], 
"eval_len": [93, 34, 36, 39, 32, 36, 37, 178, 93, 121]}

  8%|▊         | 79997/1000000 [1:19:30<12:06:09, 21.12it/s]global step 80000, trans_decision ep_re 51.92831630144046

{"global_step": 80000, "eval_re": [27.550149317302758, 19.614760614982337, 
22.466203687476128, 16.653483913275686, 134.5939056784636, 14.968739305080174, 
113.46619567298359, 28.953433388437976, 122.54584351912602, 18.470447917276335],
"eval_len": [38, 33, 33, 28, 102, 37, 97, 37, 82, 30]}

  9%|▉         | 89999/1000000 [1:30:00<11:56:16, 21.17it/s]global step 90000, trans_decision ep_re 144.48070008145467

{"global_step": 90000, "eval_re": [688.7591873494462, 15.651337025691822, 
272.1771210294502, 12.901955299837336, 132.22860937339556, 25.001046110011973, 
235.34446401855544, 19.171662223751706, 20.89395221731678, 22.677666167089473], 
"eval_len": [295, 27, 146, 28, 94, 36, 132, 32, 31, 34]}

 10%|▉         | 99998/1000000 [1:40:40<11:45:16, 21.27it/s]global step 100000, trans_decision ep_re 274.1819362829779

{"global_step": 100000, "eval_re": [22.237727521470763, 29.18147267821911, 
633.7794749386858, 20.58431651120912, 24.475103006659374, 569.9936486471366, 
247.31356382952563, 329.8459115821191, 475.5580854060918, 388.850058708662], 
"eval_len": [36, 38, 372, 33, 38, 299, 151, 154, 217, 193]}

 11%|█         | 109999/1000000 [1:51:20<11:37:30, 21.27it/s]global step 110000, trans_decision ep_re 224.12424031153833

{"global_step": 110000, "eval_re": [626.8513663175694, 19.458985892611647, 
343.04344752761097, 192.94231197903085, 392.5539684102834, 133.16899109250684, 
189.7897740064016, 22.16543363706425, 178.89325158901383, 142.37487266329043], 
"eval_len": [314, 31, 192, 116, 191, 102, 114, 34, 115, 94]}

 12%|█▏        | 119999/1000000 [2:01:40<11:32:23, 21.18it/s]global step 120000, trans_decision ep_re 102.32043360999594

{"global_step": 120000, "eval_re": [22.965963786005645, 356.72521179326765, 
18.57386250954578, 169.48319967154512, 17.63962753923664, 25.185638782439593, 
23.308197430810967, 21.044619860684698, 345.3757630158927, 22.90225171053068], 
"eval_len": [38, 226, 36, 105, 30, 36, 34, 35, 258, 38]}

 13%|█▎        | 129998/1000000 [2:12:30<11:16:27, 21.44it/s]global step 130000, trans_decision ep_re 153.17997353671126

{"global_step": 130000, "eval_re": [403.5982392798079, 19.859915527448642, 
23.52523464863569, 432.2408118753537, 22.764931224805835, 354.99531948986004, 
19.5277698506654, 24.009153136925512, 14.715890808720333, 216.5624695248896], 
"eval_len": [205, 30, 34, 174, 35, 214, 37, 35, 28, 133]}

 14%|█▍        | 139999/1000000 [2:23:10<11:17:30, 21.16it/s]global step 140000, trans_decision ep_re 60.596153629660456

{"global_step": 140000, "eval_re": [28.935253338629824, 20.99842792568963, 
27.72798796403706, 22.06652035537204, 24.483920770323973, 228.52963123800762, 
13.81093653323697, 195.61981287804542, 25.385471893378323, 18.403573399883758], 
"eval_len": [40, 38, 39, 36, 37, 151, 26, 132, 37, 34]}

 15%|█▍        | 149998/1000000 [2:33:40<10:56:58, 21.56it/s]global step 150000, trans_decision ep_re 186.07170890641603

{"global_step": 150000, "eval_re": [552.1721279560063, 26.226163534719078, 
554.2069180518935, 23.153242116757013, 28.11333386331961, 23.74088077443669, 
302.7983202659686, 22.94317409076963, 11.305077519472542, 316.0578508908175], 
"eval_len": [204, 37, 356, 33, 38, 36, 167, 36, 24, 193]}

 16%|█▌        | 159999/1000000 [2:44:00<10:57:21, 21.30it/s]global step 160000, trans_decision ep_re 128.4969753298432

{"global_step": 160000, "eval_re": [13.520040292140745, 422.52484180783455, 
18.18962947772223, 22.742279370463386, 21.904762476189806, 23.255461112748907, 
24.254407088607284, 667.1064905090175, 26.87858307600942, 44.59325808769819], 
"eval_len": [24, 214, 32, 37, 36, 31, 38, 306, 39, 52]}

 17%|█▋        | 169998/1000000 [2:54:50<10:44:13, 21.47it/s]global step 170000, trans_decision ep_re 216.54167675550426

{"global_step": 170000, "eval_re": [96.65079232202406, 176.07631389254473, 
219.1484202529064, 237.58359082483096, 19.67999280893472, 20.986203371372547, 
826.2668837982142, 22.987406214460794, 348.0852682219058, 197.95189584784833], 
"eval_len": [259, 125, 217, 244, 31, 34, 403, 34, 229, 127]}

 18%|█▊        | 179999/1000000 [3:05:30<10:43:32, 21.24it/s]global step 180000, trans_decision ep_re 201.6458700404797

{"global_step": 180000, "eval_re": [24.6695107747793, 23.717866859029847, 
484.42430641750644, 587.4043976734108, 22.012455560983312, 38.7683790125693, 
24.740756939854478, 438.5346328638866, 355.31075220184243, 16.875642100934638], 
"eval_len": [38, 38, 289, 352, 38, 50, 35, 225, 165, 31]}

 19%|█▉        | 189999/1000000 [3:16:00<10:34:26, 21.28it/s]global step 190000, trans_decision ep_re 138.64342431027674

{"global_step": 190000, "eval_re": [22.30837592108473, 704.902631590648, 
12.155993785930438, 113.55983283653607, 20.97482226478638, 26.166910257209828, 
25.818046603808597, 391.02041133191926, 44.77142547774053, 24.75579303310368], 
"eval_len": [40, 353, 22, 124, 30, 38, 36, 310, 48, 38]}

 20%|█▉        | 199999/1000000 [3:26:40<10:26:41, 21.28it/s]global step 200000, trans_decision ep_re 249.18901801349358

{"global_step": 200000, "eval_re": [232.7632979563983, 151.77877130885076, 
367.5661405771327, 192.29698103990307, 15.511021940353897, 254.87053180737067, 
15.226959455402882, 924.9632560972289, 317.92870385475777, 18.98451609753677], 
"eval_len": [169, 102, 182, 145, 27, 140, 37, 384, 219, 28]}

 21%|██        | 209999/1000000 [3:37:10<10:25:22, 21.05it/s]global step 210000, trans_decision ep_re 239.15227847252373

{"global_step": 210000, "eval_re": [168.92676435127186, 212.73212874951506, 
497.0384890029136, 526.6381215865786, 125.054791739469, 46.67985835970225, 
23.2201920100393, 514.5644424918335, 22.96734191542715, 253.7006545184867], 
"eval_len": [133, 134, 323, 251, 144, 55, 33, 214, 37, 144]}

 22%|██▏       | 219998/1000000 [3:47:50<10:06:03, 21.45it/s]global step 220000, trans_decision ep_re 198.3684383837778

{"global_step": 220000, "eval_re": [258.89148001444977, 18.619818926645706, 
205.2682794999881, 18.373241029535876, 851.0809762628406, 54.37615509564818, 
137.82541695507288, 216.10760862582794, 12.70231603776172, 210.43909139000746], 
"eval_len": [144, 34, 128, 31, 408, 53, 91, 209, 24, 142]}

 23%|██▎       | 229999/1000000 [3:58:30<10:08:13, 21.10it/s]global step 230000, trans_decision ep_re 216.34614126326747

{"global_step": 230000, "eval_re": [25.89480634871704, 414.92934554342355, 
125.74647568656839, 502.2423027116324, 20.072056791609484, 411.59423330155425, 
288.49699731731823, 22.774156385218056, 26.833794172149517, 324.8772443744836], 
"eval_len": [37, 190, 121, 240, 35, 183, 174, 35, 38, 178]}

 24%|██▍       | 239999/1000000 [4:09:00<9:56:34, 21.23it/s]global step 240000, trans_decision ep_re 251.54030567882197

{"global_step": 240000, "eval_re": [125.52699085276055, 508.1925249603977, 
43.88502043255339, 249.33911288450153, 132.62679870012178, 24.4068177316485, 
373.3965515002174, 384.36042971637306, 132.64036316921386, 541.0284468404319], 
"eval_len": [98, 211, 53, 137, 80, 34, 208, 191, 96, 224]}

 25%|██▍       | 249998/1000000 [4:19:40<9:50:23, 21.17it/s]global step 250000, trans_decision ep_re 164.92148421594024

{"global_step": 250000, "eval_re": [18.607346690453042, 17.03233809207474, 
760.9035491394171, 22.940897118109472, 256.5731859289511, 15.654293190076546, 
14.46980350643136, 184.68346796996804, 335.7454785696333, 22.604481954288108], 
"eval_len": [39, 29, 326, 38, 156, 27, 27, 133, 173, 33]}

 26%|██▌       | 259999/1000000 [4:30:00<9:42:19, 21.18it/s]global step 260000, trans_decision ep_re 110.04524523046287

{"global_step": 260000, "eval_re": [157.8912665588082, 416.3818301259253, 
169.88010978250642, 25.717460493623864, 34.81330570936163, 42.75703790640447, 
23.475259990973314, 22.18453949145133, 21.61879347020532, 185.73284877536875], 
"eval_len": [114, 232, 126, 37, 50, 50, 36, 35, 34, 123]}

 27%|██▋       | 269998/1000000 [4:40:50<9:27:55, 21.42it/s]global step 270000, trans_decision ep_re 292.0910978750831

{"global_step": 270000, "eval_re": [97.34268544711244, 20.87932090826699, 
510.788381563355, 462.30102726406346, 1033.726084965537, 13.873858150407179, 
26.314928461607963, 380.79825916866594, 358.2138743799435, 16.672558441871185], 
"eval_len": [195, 34, 240, 239, 410, 28, 37, 177, 174, 27]}

 28%|██▊       | 279998/1000000 [4:51:30<9:20:42, 21.40it/s]global step 280000, trans_decision ep_re 321.9760398812756

{"global_step": 280000, "eval_re": [391.46531419689865, 27.200298694438835, 
17.76584990991332, 23.419114507482302, 516.6957965401165, 559.3769308895572, 
21.501167880536773, 549.6984317401291, 388.1831993615672, 724.4542950921165], 
"eval_len": [201, 37, 27, 34, 237, 226, 36, 229, 203, 314]}

 29%|██▉       | 289999/1000000 [5:02:10<9:18:33, 21.19it/s]global step 290000, trans_decision ep_re 356.6509993503877

{"global_step": 290000, "eval_re": [322.464924144762, 214.2792944277438, 
709.9995017041422, 1217.2331783254558, 19.06611867055818, 549.6903924664475, 
20.944791420717774, 12.75195370799829, 17.13303143349847, 482.94680720255275], 
"eval_len": [175, 125, 267, 448, 36, 252, 31, 25, 30, 205]}

 30%|██▉       | 299999/1000000 [5:12:30<9:10:34, 21.19it/s]global step 300000, trans_decision ep_re 104.69156924379487

{"global_step": 300000, "eval_re": [24.42305983679687, 21.75191963550367, 
148.93872575591925, 31.828433422804036, 447.9007811400731, 25.74382105678631, 
16.32598313465833, 282.9175910710097, 23.419344481088867, 23.666032903308686], 
"eval_len": [33, 33, 119, 50, 187, 36, 35, 142, 39, 38]}

 31%|███       | 309998/1000000 [5:23:20<8:55:36, 21.47it/s]global step 310000, trans_decision ep_re 171.39659625111136

{"global_step": 310000, "eval_re": [726.9928751544174, 238.94579099636516, 
182.2918912844173, 14.972489187061749, 185.8029104094303, 16.78199279222547, 
149.41073869339704, 138.77749856649058, 15.719673065999048, 44.27010236130936], 
"eval_len": [320, 144, 113, 26, 110, 35, 99, 99, 26, 52]}

 32%|███▏      | 319999/1000000 [5:34:00<9:01:43, 20.92it/s]global step 320000, trans_decision ep_re 210.03567949156022

{"global_step": 320000, "eval_re": [490.49068076312045, 649.3751091124723, 
156.60810358367644, 21.828328588028306, 146.7497556936641, 26.347085245522063, 
297.54825574207666, 17.74209262365639, 276.4848798675767, 17.18250369580853], 
"eval_len": [221, 273, 118, 32, 118, 38, 152, 31, 165, 28]}

 33%|███▎      | 329999/1000000 [5:44:30<8:45:29, 21.25it/s]global step 330000, trans_decision ep_re 280.89178519582595

{"global_step": 330000, "eval_re": [235.93240313995216, 180.98748970322004, 
22.14440150039993, 537.5690329749048, 27.020240247998064, 160.51916044083848, 
780.0905314921423, 25.112855861757954, 151.17830447407667, 688.3634321229687], 
"eval_len": [144, 115, 38, 303, 38, 108, 292, 38, 128, 304]}

 34%|███▍      | 339998/1000000 [5:55:10<8:37:42, 21.25it/s]global step 340000, trans_decision ep_re 199.42962499390552

{"global_step": 340000, "eval_re": [23.55447747930755, 364.1214635802942, 
520.4614563788846, 16.79419798695065, 17.117376794245992, 962.2151401691476, 
24.599078740449652, 25.33971243874137, 19.512155114389675, 20.581191256644047], 
"eval_len": [36, 196, 206, 28, 28, 492, 38, 38, 35, 32]}

 35%|███▍      | 349999/1000000 [6:05:50<8:30:40, 21.21it/s]global step 350000, trans_decision ep_re 354.4114136284451

{"global_step": 350000, "eval_re": [25.875393156013953, 18.61309388889449, 
29.03787074151256, 19.41281035487262, 778.1572631155802, 657.2260077937644, 
20.59138287163242, 26.94663193159787, 287.47182985911553, 1680.7818525714672], 
"eval_len": [37, 29, 39, 34, 333, 261, 38, 36, 151, 564]}

 36%|███▌      | 359999/1000000 [6:16:20<8:27:35, 21.01it/s]global step 360000, trans_decision ep_re 178.58783548266734

{"global_step": 360000, "eval_re": [229.8536885208101, 461.4528373579351, 
27.782707679857864, 276.7782404288354, 111.15079976903954, 22.17739776519762, 
450.08897318877644, 24.137487138481205, 25.584435851237064, 156.8717871265035], 
"eval_len": [130, 218, 38, 155, 143, 37, 168, 39, 37, 162]}

 37%|███▋      | 369998/1000000 [6:27:00<8:10:44, 21.40it/s]global step 370000, trans_decision ep_re 528.4623556057114

{"global_step": 370000, "eval_re": [967.9897056855754, 168.26445025127302, 
21.72673295781875, 523.1886326929064, 430.8003957147989, 618.1404159952533, 
473.6999888729713, 528.0769447884304, 905.5205474284681, 647.2157416696185], 
"eval_len": [294, 97, 34, 207, 200, 214, 206, 229, 309, 267]}

 38%|███▊      | 379998/1000000 [6:37:40<8:01:48, 21.45it/s]global step 380000, trans_decision ep_re 106.26553250411544

{"global_step": 380000, "eval_re": [27.68116342114165, 328.89307260965927, 
22.029212973404434, 129.13472327620173, 24.858931014375464, 23.799777338122336, 
350.24177727142705, 21.19065503513299, 112.75066524865939, 22.075346853030055], 
"eval_len": [36, 167, 36, 109, 35, 39, 168, 32, 105, 38]}

 39%|███▉      | 389999/1000000 [6:48:10<7:58:21, 21.25it/s]global step 390000, trans_decision ep_re 81.85224102414756

{"global_step": 390000, "eval_re": [25.2203958406661, 27.505720694552153, 
21.51563074126428, 23.59706284842304, 19.33304466745044, 21.909371253604856, 
607.3529113979097, 20.02580364863465, 25.890033373713628, 26.172435775256815], 
"eval_len": [34, 41, 38, 34, 30, 34, 243, 33, 38, 35]}

 40%|███▉      | 399998/1000000 [6:58:50<7:46:31, 21.43it/s]global step 400000, trans_decision ep_re 394.862279471707

{"global_step": 400000, "eval_re": [21.68243528631646, 831.4568743719, 
20.864584998364457, 874.099198435213, 361.3106207106193, 1032.3963034494298, 
739.0850200513843, 28.16732144879502, 14.702182229114836, 24.858253735933168], 
"eval_len": [34, 269, 37, 264, 162, 378, 244, 38, 30, 33]}

 41%|████      | 409999/1000000 [7:09:30<7:41:55, 21.29it/s]global step 410000, trans_decision ep_re 272.5151643573242

{"global_step": 410000, "eval_re": [562.9976012627426, 23.383413439203654, 
44.49527231147778, 17.89573843142793, 25.53026516213138, 978.0304878020723, 
978.2027846801473, 22.879499383317317, 22.293745863820916, 49.442835236900976], 
"eval_len": [228, 34, 50, 36, 37, 361, 354, 36, 36, 52]}

 42%|████▏     | 419999/1000000 [7:20:00<7:32:42, 21.35it/s]global step 420000, trans_decision ep_re 488.6525096653906

{"global_step": 420000, "eval_re": [378.75062639590556, 601.3186661725928, 
188.32161772862256, 472.4440900002276, 689.0473675952766, 1023.6022821448843, 
691.4436021898688, 24.66179724428407, 673.5817750934967, 143.35327208874696], 
"eval_len": [205, 206, 122, 186, 235, 356, 233, 35, 238, 109]}

 43%|████▎     | 429999/1000000 [7:30:40<7:32:18, 21.00it/s]global step 430000, trans_decision ep_re 288.0229880308604

{"global_step": 430000, "eval_re": [299.6950307346261, 599.569009603477, 
26.641549864746043, 813.9604309100595, 24.687677395592157, 18.948642605609063, 
149.56825772173534, 759.2161753637499, 20.27219862163324, 167.6709074873757], 
"eval_len": [191, 239, 35, 291, 37, 30, 96, 283, 37, 100]}

 44%|████▍     | 439998/1000000 [7:41:20<7:14:27, 21.48it/s]global step 440000, trans_decision ep_re 125.83698383013935

{"global_step": 440000, "eval_re": [16.450413431792857, 21.40632917238546, 
430.6273473120681, 27.03751618254273, 18.224075994733123, 22.75185779953101, 
19.65299145775794, 44.35803716056054, 158.65184643267872, 499.20942335734287], 
"eval_len": [34, 33, 179, 37, 28, 32, 30, 52, 99, 189]}

 45%|████▍     | 449999/1000000 [7:52:00<7:18:44, 20.89it/s]global step 450000, trans_decision ep_re 293.2622058086788

{"global_step": 450000, "eval_re": [588.7003507052685, 608.2423411228223, 
27.868939568553394, 23.71871175740471, 16.76428027866926, 24.432650911194806, 
338.5878651624917, 24.923142586261584, 812.6373805352212, 466.74639545890085], 
"eval_len": [223, 217, 39, 35, 36, 36, 187, 36, 290, 181]}

 46%|████▌     | 459999/1000000 [8:02:30<7:06:08, 21.12it/s]global step 460000, trans_decision ep_re 85.35358868414922

{"global_step": 460000, "eval_re": [21.294237709282534, 148.31702912415966, 
109.68091254824364, 160.5134643581999, 25.064067563672747, 103.54301608159403, 
128.3658600002193, 18.894996158501012, 20.22080143188069, 117.64150186573863], 
"eval_len": [37, 99, 96, 105, 36, 89, 102, 31, 36, 104]}

 47%|████▋     | 469998/1000000 [8:13:10<6:55:58, 21.24it/s]global step 470000, trans_decision ep_re 179.219922535413

{"global_step": 470000, "eval_re": [379.094126760134, 997.4791047006215, 
33.66841975316159, 22.434120659216905, 26.830558943634852, 23.995734704195662, 
205.89156332610233, 15.177103363343724, 43.604522183268664, 44.023970960450825],
"eval_len": [211, 334, 48, 33, 36, 35, 133, 31, 49, 51]}

 48%|████▊     | 479999/1000000 [8:23:40<6:48:29, 21.22it/s]global step 480000, trans_decision ep_re 181.83001130672218

{"global_step": 480000, "eval_re": [17.624514637105495, 17.41813670130648, 
22.179667933520935, 45.92650790319622, 24.92519179675457, 125.29438144113503, 
18.250378134945706, 461.31086493421896, 27.12011066527038, 1058.2503589197681], 
"eval_len": [30, 28, 33, 52, 38, 108, 29, 187, 37, 473]}

 49%|████▉     | 489998/1000000 [8:34:20<6:37:42, 21.37it/s]global step 490000, trans_decision ep_re 180.8956695638901

{"global_step": 490000, "eval_re": [444.07998476649004, 14.11846377090182, 
14.518582000791417, 24.720263099347285, 365.62896968520784, 20.54295051607858, 
526.5481146895345, 154.41487345104684, 229.61078116714185, 14.773712492361085], 
"eval_len": [202, 24, 27, 36, 169, 31, 217, 104, 145, 25]}

 50%|████▉     | 499999/1000000 [8:45:00<6:33:22, 21.18it/s]global step 500000, trans_decision ep_re 371.29651178927475

{"global_step": 500000, "eval_re": [923.4249749460027, 22.631459325245842, 
681.9111836139459, 22.320588214150327, 24.152853327557075, 452.9936977206132, 
359.08126139972126, 433.245044474709, 368.9967068473227, 424.2073480234797], 
"eval_len": [339, 37, 236, 35, 39, 180, 186, 180, 185, 193]}

 51%|█████     | 509999/1000000 [8:55:40<6:25:48, 21.17it/s]global step 510000, trans_decision ep_re 204.9416568680756

{"global_step": 510000, "eval_re": [20.89636421004078, 23.72733658155407, 
26.55587253302764, 17.538285032250652, 659.0913908707714, 146.88941329961128, 
22.95352255770093, 623.2391814786845, 28.43026160698611, 480.09494051012854], 
"eval_len": [39, 38, 37, 30, 291, 96, 38, 239, 36, 222]}

 52%|█████▏    | 519999/1000000 [9:06:10<6:16:54, 21.23it/s]global step 520000, trans_decision ep_re 154.05163990664153

{"global_step": 520000, "eval_re": [270.9816387561684, 17.46883622537062, 
25.28429087332872, 275.09085847439303, 19.42922964028626, 21.668500004625425, 
553.7054644776274, 314.75628038624257, 19.555328967216287, 22.575971261156397], 
"eval_len": [176, 31, 37, 129, 30, 36, 220, 177, 38, 33]}

 53%|█████▎    | 529998/1000000 [9:16:50<6:06:19, 21.38it/s]global step 530000, trans_decision ep_re 356.4915121077141

{"global_step": 530000, "eval_re": [738.3976792458733, 25.26783612750866, 
477.5921045910501, 479.8866946579608, 21.43652632943862, 730.5011474996815, 
35.44006764732004, 1007.7111035666968, 24.95626051237686, 23.725700899234017], 
"eval_len": [242, 36, 184, 191, 34, 267, 49, 326, 36, 35]}

 54%|█████▍    | 539999/1000000 [9:27:30<6:05:55, 20.95it/s]global step 540000, trans_decision ep_re 255.59413258529884

{"global_step": 540000, "eval_re": [533.6217010986404, 533.681176986044, 
35.38152930994328, 28.419215246901267, 726.4326413506063, 24.024891847772132, 
26.953307387304942, 15.375277255661256, 99.49953709116839, 532.5520482789464], 
"eval_len": [212, 184, 50, 38, 243, 32, 37, 34, 111, 207]}

 55%|█████▍    | 549999/1000000 [9:37:50<5:54:07, 21.18it/s]global step 550000, trans_decision ep_re 22.109127038720274

{"global_step": 550000, "eval_re": [20.775317387848762, 23.566626899138768, 
26.350337803993444, 19.465969584831548, 25.784456849765053, 20.924193723504096, 
20.173479054234114, 21.973335676838293, 20.588475054452424, 21.489078352596234],
"eval_len": [31, 35, 39, 33, 38, 40, 33, 33, 33, 35]}

 56%|█████▌    | 559998/1000000 [9:48:40<5:46:33, 21.16it/s]global step 560000, trans_decision ep_re 122.96467657486753

{"global_step": 560000, "eval_re": [24.57603920298543, 30.741727327095354, 
187.5984593743933, 125.46918844546954, 19.229454358900842, 21.997912665372706, 
156.0327639257219, 21.853166327531518, 617.8572431152537, 24.290811005950914], 
"eval_len": [38, 47, 128, 132, 30, 35, 121, 35, 207, 38]}

 57%|█████▋    | 569999/1000000 [9:59:20<5:39:01, 21.14it/s]global step 570000, trans_decision ep_re 185.1287995356409

{"global_step": 570000, "eval_re": [23.725779105291622, 757.1072148014841, 
534.7978559506067, 21.228495151106493, 23.860638172586928, 139.6401496347457, 
25.829837171282367, 23.31220340417075, 282.8166861026107, 18.96913586252323], 
"eval_len": [33, 257, 227, 38, 38, 97, 39, 35, 131, 30]}

 58%|█████▊    | 579999/1000000 [10:09:50<5:33:33, 20.99it/s]global step 580000, trans_decision ep_re 160.26369793250927

{"global_step": 580000, "eval_re": [42.86792589778406, 19.82892562652445, 
17.243661037713874, 18.905013742829105, 132.69296839749984, 155.0042501096931, 
699.0003583988167, 46.937861557901776, 36.0222347684434, 434.1337797878863], 
"eval_len": [50, 34, 30, 32, 124, 115, 275, 51, 50, 188]}

 59%|█████▉    | 589998/1000000 [10:20:30<5:20:08, 21.35it/s]global step 590000, trans_decision ep_re 182.28813059058223

{"global_step": 590000, "eval_re": [427.61796078991057, 133.22884827896485, 
89.08675120187031, 99.21795917486232, 26.218699927003552, 26.389355559271245, 
405.9819876028307, 20.530709677980482, 19.193588805805224, 575.415444887323], 
"eval_len": [171, 99, 94, 83, 36, 37, 173, 34, 30, 235]}

 60%|█████▉    | 599999/1000000 [10:31:10<5:14:42, 21.18it/s]global step 600000, trans_decision ep_re 97.8889401410062

{"global_step": 600000, "eval_re": [24.387050627676338, 24.70755125722155, 
18.014995346949892, 28.766317696355735, 162.09484885209073, 13.864551385202606, 
235.27346955005842, 250.12150805778361, 21.699307368666872, 199.95980126805625],
"eval_len": [37, 35, 29, 37, 102, 34, 122, 133, 35, 119]}

 61%|██████    | 609999/1000000 [10:41:40<5:06:58, 21.17it/s]global step 610000, trans_decision ep_re 276.46739109182136

{"global_step": 610000, "eval_re": [22.89773010432928, 706.2289629748658, 
592.2501767118783, 23.36376349969003, 441.2439235907397, 22.673640008476937, 
22.68861887793113, 242.03451214825367, 178.97058643148856, 512.3219965705599], 
"eval_len": [34, 224, 220, 39, 199, 38, 34, 147, 127, 200]}

 62%|██████▏   | 619998/1000000 [10:52:20<4:56:02, 21.39it/s]global step 620000, trans_decision ep_re 248.9645771550268

{"global_step": 620000, "eval_re": [186.91535490777363, 17.948465701260282, 
878.7732282679506, 554.2413917057347, 20.045189985756924, 21.80949175819113, 
763.1924285835653, 16.134455318488257, 20.589611491361357, 9.99615383018613], 
"eval_len": [105, 30, 265, 227, 34, 40, 271, 37, 37, 20]}

 63%|██████▎   | 629999/1000000 [11:03:00<4:51:05, 21.18it/s]global step 630000, trans_decision ep_re 148.72973836088698

{"global_step": 630000, "eval_re": [22.684168595431544, 18.045226745692073, 
24.56666011252647, 27.058638226018477, 22.33797516600187, 26.76118629313847, 
23.402845436935788, 230.99413920360035, 482.0939280539099, 609.3526157756148], 
"eval_len": [34, 28, 38, 38, 35, 38, 33, 146, 191, 223]}

 64%|██████▍   | 639999/1000000 [11:13:30<4:42:31, 21.24it/s]global step 640000, trans_decision ep_re 191.7622774393927

{"global_step": 640000, "eval_re": [17.891637819402867, 18.635143961129977, 
26.09811069288429, 466.7239985618717, 31.098633970226256, 20.87530846551926, 
26.280220349433442, 477.2034714089881, 20.06591778903228, 812.7503313754389], 
"eval_len": [32, 35, 38, 176, 43, 37, 38, 221, 35, 269]}

 65%|██████▍   | 649998/1000000 [11:24:10<4:32:27, 21.41it/s]global step 650000, trans_decision ep_re 92.36247827441385

{"global_step": 650000, "eval_re": [24.705800905469612, 27.565472805553792, 
27.662035801850983, 305.57612641753957, 25.46307356894021, 23.770798134511086, 
23.055721101277367, 420.3753679111316, 21.497538058798725, 23.95284803906545], 
"eval_len": [37, 38, 38, 202, 39, 33, 35, 241, 33, 34]}

 66%|██████▌   | 659999/1000000 [11:34:50<4:27:37, 21.17it/s]global step 660000, trans_decision ep_re 195.559386873088

{"global_step": 660000, "eval_re": [176.17776430965654, 22.435463040557305, 
180.38800680889005, 24.20562820355755, 18.04196260075665, 155.979503126884, 
807.0136875815862, 269.2768225851591, 276.377255916701, 25.697774557131602], 
"eval_len": [101, 33, 99, 35, 31, 94, 310, 140, 124, 36]}

 67%|██████▋   | 669999/1000000 [11:45:20<4:21:44, 21.01it/s]global step 670000, trans_decision ep_re 280.7027092665025

{"global_step": 670000, "eval_re": [17.893359680584926, 21.189070749595814, 
739.1306626178657, 30.462314775081445, 329.0296768908873, 132.90030984324997, 
13.710152426854528, 104.5819058040335, 98.41351653760006, 1319.7161233392717], 
"eval_len": [31, 36, 267, 48, 134, 96, 29, 88, 82, 420]}

 68%|██████▊   | 679998/1000000 [11:56:00<4:08:24, 21.47it/s]global step 680000, trans_decision ep_re 134.72379460835242

{"global_step": 680000, "eval_re": [20.201141808609734, 21.241285433163394, 
267.99265698152334, 316.49522053065414, 96.72018261293873, 44.81523384665388, 
38.80118621232512, 46.58159507952219, 461.22002126702785, 33.16942231110572], 
"eval_len": [34, 35, 161, 173, 94, 51, 48, 51, 176, 48]}

 69%|██████▉   | 689999/1000000 [12:06:30<4:05:29, 21.05it/s]global step 690000, trans_decision ep_re 193.63708900034908

{"global_step": 690000, "eval_re": [233.40293912193172, 111.45702520469789, 
334.6584078959566, 25.203741324787767, 307.0117909819495, 134.99857830478476, 
25.61041146554802, 19.784049219608455, 397.9277426192863, 346.3162038649399], 
"eval_len": [112, 94, 158, 38, 148, 100, 38, 34, 206, 158]}

 70%|██████▉   | 699999/1000000 [12:17:10<3:54:35, 21.31it/s]global step 700000, trans_decision ep_re 175.91569837605567

{"global_step": 700000, "eval_re": [320.8625296555837, 167.83960223628694, 
15.700286649139901, 22.518445832269354, 208.18926951370366, 21.172518818210825, 
244.12987687348718, 23.83372070844776, 372.3786995677609, 362.532033905666], 
"eval_len": [225, 103, 34, 37, 143, 36, 138, 36, 246, 211]}

 71%|███████   | 709998/1000000 [12:27:40<3:46:42, 21.32it/s]global step 710000, trans_decision ep_re 267.26359371897763

{"global_step": 710000, "eval_re": [377.83740555163996, 187.23149515367038, 
41.92967024203988, 433.28469368211995, 88.04175594572581, 379.01116344075206, 
101.01685653453617, 727.0557815104066, 10.379932517390547, 326.8471826114944], 
"eval_len": [185, 160, 50, 260, 96, 156, 89, 230, 21, 175]}

 72%|███████▏  | 719999/1000000 [12:38:20<3:40:31, 21.16it/s]global step 720000, trans_decision ep_re 193.0389162661882

{"global_step": 720000, "eval_re": [18.47741868268888, 21.40346855767405, 
159.9164074378245, 22.614634927743555, 399.5960364107832, 270.09747046294666, 
17.892770485366896, 61.828780713993154, 273.92643772408076, 684.6357372587804], 
"eval_len": [37, 37, 99, 32, 159, 166, 32, 85, 146, 302]}

 73%|███████▎  | 729998/1000000 [12:48:50<3:30:05, 21.42it/s]global step 730000, trans_decision ep_re 202.79246125066214

{"global_step": 730000, "eval_re": [24.678251713869482, 217.62519660551484, 
255.21527753587347, 21.67887015607064, 305.6963461232789, 506.4881489900621, 
257.2425131760091, 22.85907955658701, 390.32465130406973, 26.116277345286104], 
"eval_len": [34, 107, 113, 37, 145, 204, 136, 37, 170, 36]}

 74%|███████▍  | 739997/1000000 [12:59:20<3:22:56, 21.35it/s]global step 740000, trans_decision ep_re 260.9588870866666

{"global_step": 740000, "eval_re": [90.11483622946156, 21.47385104625054, 
866.8101163631859, 547.6033402526572, 19.91228979649871, 17.504926372342027, 
987.3521326663272, 25.922578962602827, 18.41803168427871, 14.476767493061784], 
"eval_len": [88, 37, 262, 189, 36, 36, 313, 38, 37, 27]}

 75%|███████▍  | 749999/1000000 [13:09:50<3:15:49, 21.28it/s]global step 750000, trans_decision ep_re 113.74320341392338

{"global_step": 750000, "eval_re": [23.65920636350351, 88.37554803787212, 
40.56423742187859, 21.323451900805818, 35.25829466741174, 603.6942063581934, 
19.034605269088782, 80.60989339468676, 16.1101877795908, 208.80240294620228], 
"eval_len": [33, 90, 50, 32, 48, 239, 30, 84, 27, 129]}

 76%|███████▌  | 759999/1000000 [13:20:20<3:07:15, 21.36it/s]global step 760000, trans_decision ep_re 85.0044094144996

{"global_step": 760000, "eval_re": [162.88157682751262, 120.69882451791243, 
24.582979809065233, 108.68671719633653, 122.06361037130101, 121.60295878935837, 
19.676024119611668, 28.072324061951345, 18.64134296005175, 123.13773549189516], 
"eval_len": [137, 94, 36, 86, 96, 91, 35, 38, 38, 93]}

 77%|███████▋  | 769999/1000000 [13:30:42<3:00:37, 21.22it/s]global step 770000, trans_decision ep_re 53.463369688590625

{"global_step": 770000, "eval_re": [313.44675092306255, 24.664149154850676, 
24.672949065936855, 21.66369333521117, 23.794319778111053, 22.454151307767326, 
21.058777518354585, 18.47418403929304, 48.110564561604285, 16.294157201714658], 
"eval_len": [141, 39, 37, 38, 36, 35, 36, 33, 52, 33]}

 78%|███████▊  | 779998/1000000 [13:41:30<2:49:50, 21.59it/s]global step 780000, trans_decision ep_re 136.05084843835016

{"global_step": 780000, "eval_re": [89.91730133556447, 22.016340682141625, 
21.700505366846517, 17.92830399069013, 26.607771055121976, 15.491239788544643, 
19.234634787591236, 20.82905910635553, 308.85485363624815, 817.9284746343973], 
"eval_len": [85, 36, 32, 31, 38, 36, 29, 34, 153, 247]}

 79%|███████▉  | 789998/1000000 [13:52:00<2:42:25, 21.55it/s]global step 790000, trans_decision ep_re 110.47570472617956

{"global_step": 790000, "eval_re": [19.668378545294473, 24.66210600541011, 
47.73958732169221, 118.50746913094639, 457.6462243502367, 331.78042814650377, 
33.98463340760441, 21.26416843813482, 23.426737392635772, 26.07731452333698], 
"eval_len": [38, 35, 52, 89, 219, 143, 47, 35, 38, 38]}

 80%|███████▉  | 799999/1000000 [14:02:30<2:36:02, 21.36it/s]global step 800000, trans_decision ep_re 168.33859441301598

{"global_step": 800000, "eval_re": [92.59805088964488, 32.414997346201105, 
25.855355971559092, 170.6536641390371, 545.6722921446774, 18.327407158742563, 
243.71534393957148, 265.0807482037606, 270.5490605381769, 18.519023798788798], 
"eval_len": [84, 47, 37, 119, 259, 30, 135, 148, 141, 31]}

 81%|████████  | 809999/1000000 [14:13:11<2:28:06, 21.38it/s]global step 810000, trans_decision ep_re 306.17862385905613

{"global_step": 810000, "eval_re": [25.45764372368593, 753.9154036349014, 
263.2090181985564, 21.432688236300862, 604.4072402614413, 23.0609969282678, 
907.9872723551225, 24.47602480990353, 218.7451790162961, 219.0947714260862], 
"eval_len": [36, 292, 196, 35, 230, 38, 286, 36, 139, 180]}

 82%|████████▏ | 819998/1000000 [14:23:41<2:18:45, 21.62it/s]global step 820000, trans_decision ep_re 106.45782684008898

{"global_step": 820000, "eval_re": [27.23010820357338, 158.28844033014278, 
286.1106312865353, 16.44185983373356, 22.061603675181498, 205.4237803993124, 
122.29397557326033, 15.032824535131018, 107.090014843603, 104.60502972041654], 
"eval_len": [37, 93, 139, 30, 39, 130, 92, 27, 85, 84]}

 83%|████████▎ | 829999/1000000 [14:34:11<2:12:40, 21.36it/s]global step 830000, trans_decision ep_re 135.71275332724124

{"global_step": 830000, "eval_re": [514.6901861476805, 20.412431406947945, 
298.0749349651926, 16.165372516735445, 178.29187968038525, 18.193765670310636, 
151.14985097095172, 30.409744084533216, 103.57101652227453, 26.168351307400652],
"eval_len": [202, 35, 177, 25, 108, 32, 90, 40, 85, 39]}

 84%|████████▍ | 839999/1000000 [14:44:41<2:04:37, 21.40it/s]global step 840000, trans_decision ep_re 187.42948840257154

{"global_step": 840000, "eval_re": [51.20185105626399, 406.36387281025196, 
24.60675606282261, 450.8323175359141, 130.4471843939682, 244.30710341716073, 
43.44082778307967, 171.72071987242404, 37.57566944797489, 313.7985816458553], 
"eval_len": [52, 180, 35, 169, 94, 162, 50, 179, 49, 195]}

 85%|████████▍ | 849999/1000000 [14:55:21<1:56:33, 21.45it/s]global step 850000, trans_decision ep_re 286.64669974858174

{"global_step": 850000, "eval_re": [26.302595099037863, 154.8201896061686, 
570.0392008661606, 1194.0808944764053, 95.86288276554546, 390.619402880593, 
236.60912667464464, 82.15699126401876, 22.282507681611236, 93.693206171632], 
"eval_len": [37, 104, 219, 367, 85, 173, 173, 83, 36, 83]}

 86%|████████▌ | 859998/1000000 [15:05:51<1:47:58, 21.61it/s]global step 860000, trans_decision ep_re 343.34674683619744

{"global_step": 860000, "eval_re": [38.81932277650134, 385.1205017444454, 
26.197352328576155, 155.60387523265183, 23.73333799433903, 138.03459996561384, 
204.57493378780075, 21.725084749117297, 1278.4316294427063, 1161.2268303402225],
"eval_len": [50, 164, 37, 103, 35, 96, 133, 35, 505, 611]}

 87%|████████▋ | 869999/1000000 [15:16:11<1:41:21, 21.38it/s]global step 870000, trans_decision ep_re 135.8049327772801

{"global_step": 870000, "eval_re": [176.3279995000499, 21.011951261600696, 
16.45937064131383, 661.7450781294554, 23.660232613197998, 23.516625289532254, 
26.034325694977184, 123.43329149875493, 144.46667447256578, 141.39377867135323],
"eval_len": [103, 33, 31, 227, 34, 32, 38, 91, 96, 95]}

 88%|████████▊ | 879999/1000000 [15:27:01<1:33:48, 21.32it/s]global step 880000, trans_decision ep_re 253.40759919499925

{"global_step": 880000, "eval_re": [216.21860168919238, 18.22686037802883, 
417.72242003035205, 445.3531523747051, 31.52241586594967, 190.02400071888283, 
24.619844096771274, 620.1930852256036, 17.266189229200098, 552.9294223413065], 
"eval_len": [118, 30, 207, 232, 48, 101, 35, 194, 29, 198]}

 89%|████████▉ | 889998/1000000 [15:37:31<1:25:11, 21.52it/s]global step 890000, trans_decision ep_re 127.238278218479

{"global_step": 890000, "eval_re": [264.11046066230915, 18.034245342065557, 
202.2574068868098, 359.8229828887974, 165.15264157526371, 31.864385843614194, 
25.141469010443842, 163.50804460226837, 21.692876147931496, 20.798269225286475],
"eval_len": [124, 35, 109, 160, 97, 47, 35, 100, 37, 33]}

 90%|████████▉ | 899998/1000000 [15:48:01<1:17:27, 21.52it/s]global step 900000, trans_decision ep_re 78.60248703993547

{"global_step": 900000, "eval_re": [17.814958962430154, 21.254768671258862, 
38.69238923824519, 17.46274914568074, 228.0021067718565, 205.6649902185101, 
44.407621103800345, 97.89022682262976, 94.74502253197552, 20.090036932967582], 
"eval_len": [32, 35, 48, 29, 143, 144, 49, 84, 90, 34]}

 91%|█████████ | 909997/1000000 [15:58:31<1:10:39, 21.23it/s]global step 910000, trans_decision ep_re 274.1854778698203

{"global_step": 910000, "eval_re": [19.53655804155744, 96.59130566765346, 
196.39615338038504, 474.32949389964745, 482.7989917762366, 337.81837376764497, 
822.812727674311, 262.7872299872474, 21.213145877967392, 27.57079862555241], 
"eval_len": [34, 86, 116, 201, 175, 160, 317, 165, 34, 47]}

 92%|█████████▏| 919999/1000000 [16:09:01<1:02:14, 21.42it/s]global step 920000, trans_decision ep_re 40.858634708669605

{"global_step": 920000, "eval_re": [26.453929376399863, 19.39007156379007, 
21.897352350223056, 13.984785977970047, 20.75216592019227, 18.39787389974755, 
22.776080937015443, 209.75299277724065, 22.428375866488338, 32.75271841762872], 
"eval_len": [36, 35, 31, 34, 31, 32, 35, 103, 36, 48]}

 93%|█████████▎| 929999/1000000 [16:19:31<54:33, 21.38it/s]global step 930000, trans_decision ep_re 136.50491858519675

{"global_step": 930000, "eval_re": [272.5288196893239, 268.8320086958971, 
22.319972922650077, 142.68560133873106, 18.98992403582772, 96.03657430805625, 
92.47248514557458, 196.74380760035584, 93.52031747818216, 160.91967463736847], 
"eval_len": [173, 118, 33, 88, 31, 85, 85, 101, 86, 92]}

 94%|█████████▍| 939999/1000000 [16:30:11<47:24, 21.10it/s]global step 940000, trans_decision ep_re 180.47331606927975

{"global_step": 940000, "eval_re": [34.46697986933876, 21.551454449394278, 
20.75759977097022, 231.35819594903165, 45.71247039661418, 35.2511401568475, 
428.84847413088966, 18.055409006945045, 385.6688218659629, 583.062615096803], 
"eval_len": [49, 34, 31, 129, 50, 50, 163, 33, 175, 209]}

 95%|█████████▍| 949999/1000000 [16:40:41<39:03, 21.33it/s]global step 950000, trans_decision ep_re 265.6227377628822

{"global_step": 950000, "eval_re": [408.1537365265882, 201.3398783406738, 
22.387654474974127, 374.1693691803314, 365.674167301084, 364.33551565211735, 
20.69706286822725, 7.455153639240922, 246.57208854908095, 645.4427510965039], 
"eval_len": [209, 111, 38, 182, 152, 227, 33, 19, 120, 230]}

 96%|█████████▌| 959999/1000000 [16:51:21<31:25, 21.22it/s]global step 960000, trans_decision ep_re 212.9633003696721

{"global_step": 960000, "eval_re": [204.22056742559622, 176.74667317505566, 
151.04465905203492, 506.8581244230381, 15.215182925021395, 49.296188271926766, 
443.43632368340195, 251.69838784998328, 35.03057115438305, 296.08632573627983], 
"eval_len": [116, 209, 106, 196, 27, 50, 180, 131, 48, 127]}

 97%|█████████▋| 969997/1000000 [17:01:51<23:27, 21.31it/s]global step 970000, trans_decision ep_re 254.55314646835274

{"global_step": 970000, "eval_re": [494.55970552001617, 34.62283942718381, 
19.769320065639164, 25.21655211144395, 1240.8447468740994, 573.3671781984824, 
44.043073235442854, 20.133962588487417, 48.25044727518146, 44.72363938755072], 
"eval_len": [210, 49, 36, 39, 475, 227, 52, 32, 53, 53]}

 98%|█████████▊| 979999/1000000 [17:12:31<15:48, 21.08it/s]global step 980000, trans_decision ep_re 248.90699954811126

{"global_step": 980000, "eval_re": [53.00272470903579, 354.63210313838186, 
277.84718903025777, 573.1438074962493, 214.1768571080007, 106.02252271797559, 
459.4762815918552, 17.435040159287553, 41.531002876664864, 391.8024666534036], 
"eval_len": [52, 277, 147, 260, 162, 94, 181, 29, 52, 162]}

 99%|█████████▉| 989998/1000000 [17:23:01<07:42, 21.63it/s]global step 990000, trans_decision ep_re 230.4059280026188

{"global_step": 990000, "eval_re": [602.3049119102642, 31.62205012887095, 
42.665644427531014, 779.100261107115, 399.0777444980987, 223.2046794218849, 
14.977310656738425, 167.46276707293512, 24.510797660929445, 19.133113141820477],
"eval_len": [234, 49, 50, 272, 185, 152, 32, 148, 36, 31]}

100%|█████████▉| 999999/1000000 [17:33:31<00:00, 21.04it/s]global step 1000000, trans_decision ep_re 245.1898795150868

{"global_step": 1000000, "eval_re": [807.5632168131958, 195.2538011237282, 
16.921656685278037, 35.26643163469325, 896.9240734801615, 184.48008347003088, 
30.02880447933732, 230.5902875391603, 35.22982836092282, 19.640611564359908], 
"eval_len": [335, 127, 29, 49, 285, 133, 48, 133, 49, 33]}

100%|██████████| 1000000/1000000 [17:33:39<00:00, 15.82it/s]
