
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [05:00<12:22:24, 22.22it/s]global step 10000, trans_decision ep_re 156.23652264506816

{"global_step": 10000, "eval_re": [127.11836259494918, 90.1605430863151, 
293.0151613229442, 116.7323285192936, 89.51354958974963, 311.9135664922681, 
89.31252787082605, 146.36536003483994, 107.36138181190952, 190.8724451275861], 
"eval_len": [25, 18, 54, 23, 18, 61, 18, 28, 21, 36]}

  2%|▏         | 19998/1000000 [15:11<12:02:58, 22.59it/s]global step 20000, trans_decision ep_re 136.27509254178287

{"global_step": 20000, "eval_re": [122.84837965266547, 317.1555309350772, 
117.09773782330124, 119.94819898312235, 108.16956483389092, 90.73518221837351, 
133.55566950951572, 110.75070540462443, 107.7335418662696, 134.75641419098824], 
"eval_len": [24, 58, 23, 23, 21, 18, 26, 22, 21, 27]}

  3%|▎         | 29999/1000000 [25:30<12:03:41, 22.34it/s]global step 30000, trans_decision ep_re 111.17862724440147

{"global_step": 30000, "eval_re": [106.24382668429651, 108.38618298611, 
101.50839387906561, 121.64218515636156, 118.52066986623244, 114.3394827426537, 
106.31695386570362, 102.40897727425184, 123.38140573306896, 109.03819425627054],
"eval_len": [21, 21, 20, 24, 23, 22, 21, 20, 24, 21]}

  4%|▍         | 39998/1000000 [35:40<11:51:16, 22.49it/s]global step 40000, trans_decision ep_re 154.1469911459521

{"global_step": 40000, "eval_re": [155.00065601105635, 120.0401333737352, 
375.6491942721506, 103.25953531752477, 143.13120031937638, 166.04977640500846, 
113.16998367361354, 108.61011327974245, 126.11402183473041, 130.44529697258284],
"eval_len": [30, 23, 73, 20, 27, 33, 22, 21, 24, 25]}

  5%|▍         | 49997/1000000 [45:31<11:55:31, 22.13it/s]global step 50000, trans_decision ep_re 174.45523953184883

{"global_step": 50000, "eval_re": [109.67993983028228, 118.14777635669694, 
129.08492085288626, 101.7392573145445, 102.86790065152233, 443.2459151886469, 
316.8430810151165, 151.5514357533024, 112.85616785575392, 158.5360004997362], 
"eval_len": [22, 23, 25, 20, 20, 80, 57, 29, 22, 31]}

  6%|▌         | 59999/1000000 [55:50<11:42:00, 22.32it/s]global step 60000, trans_decision ep_re 133.18190031050813

{"global_step": 60000, "eval_re": [95.50437281765383, 117.94912385709435, 
259.5714974910389, 130.63164693340303, 95.43816974882505, 122.45256572488086, 
113.02499936695394, 154.23011744567324, 147.02740674777792, 95.98910297178043], 
"eval_len": [19, 23, 52, 26, 19, 24, 22, 30, 28, 19]}

  7%|▋         | 69999/1000000 [1:06:00<11:33:50, 22.34it/s]global step 70000, trans_decision ep_re 182.35837540416853

{"global_step": 70000, "eval_re": [139.47469444162678, 376.9013151945847, 
310.03498143843166, 107.29023222540536, 112.59758566645871, 96.59756063480239, 
132.70246242920018, 319.6022934822371, 96.88028514313444, 131.50234338580415], 
"eval_len": [27, 73, 60, 21, 22, 19, 26, 58, 19, 25]}

  8%|▊         | 79997/1000000 [1:16:10<11:27:42, 22.30it/s]global step 80000, trans_decision ep_re 132.00819678794

{"global_step": 80000, "eval_re": [95.73082140140893, 105.62826580359634, 
347.95858876229704, 89.51051108342196, 94.9152115698251, 95.39424422773136, 
111.21905573894178, 146.5456178399101, 124.79566080182757, 108.38399065043978], 
"eval_len": [19, 21, 64, 18, 19, 19, 22, 28, 24, 21]}

  9%|▉         | 89999/1000000 [1:26:20<11:25:39, 22.12it/s]global step 90000, trans_decision ep_re 166.21664044493298

{"global_step": 90000, "eval_re": [149.32690508285356, 103.21927803925729, 
107.70291531025845, 168.69032939162247, 107.0678772965282, 566.8425385785844, 
95.80100726157342, 108.29893535415871, 118.22966756808533, 136.98695056640807], 
"eval_len": [29, 20, 21, 33, 21, 108, 19, 21, 23, 27]}

 10%|▉         | 99999/1000000 [1:36:30<11:10:49, 22.36it/s]global step 100000, trans_decision ep_re 168.3905270604103

{"global_step": 100000, "eval_re": [131.5519487842394, 117.57197300338855, 
377.60742612742786, 89.16869619416195, 129.9192641577253, 120.11995086220776, 
167.51289574191455, 102.70427379687126, 163.12635934165837, 284.62248259450774],
"eval_len": [26, 23, 69, 18, 25, 23, 32, 20, 31, 55]}

 11%|█         | 109999/1000000 [1:46:40<11:09:43, 22.15it/s]global step 110000, trans_decision ep_re 142.10020264122954

{"global_step": 110000, "eval_re": [208.40844987317246, 113.63348029613864, 
329.67595096662933, 130.57542877793352, 89.23120227761012, 84.22883340735194, 
112.46401467854236, 148.06188214001, 114.42901748464111, 90.29376651026593], 
"eval_len": [41, 22, 63, 25, 18, 17, 22, 29, 22, 18]}

 12%|█▏        | 119999/1000000 [1:56:50<11:04:18, 22.08it/s]global step 120000, trans_decision ep_re 179.35254606213599

{"global_step": 120000, "eval_re": [95.73743124613907, 384.8199761129658, 
113.92245509027025, 133.58684650253525, 135.42050808510672, 197.82772558011956, 
297.42782680236434, 151.98001505859918, 166.0962295605527, 116.70644658270689], 
"eval_len": [19, 73, 22, 26, 26, 37, 55, 29, 32, 23]}

 13%|█▎        | 129998/1000000 [2:06:52<10:44:07, 22.51it/s]global step 130000, trans_decision ep_re 246.7731653912568

{"global_step": 130000, "eval_re": [95.42578211921429, 521.1628196009942, 
96.32072742595443, 269.77777905509123, 116.92837188060172, 453.1991638568413, 
140.65470997035578, 102.23725124011577, 503.3148567443795, 168.71019201901981], 
"eval_len": [19, 101, 19, 55, 23, 88, 27, 20, 96, 33]}

 14%|█▍        | 139997/1000000 [2:17:20<10:47:03, 22.15it/s]global step 140000, trans_decision ep_re 124.81389515051413

{"global_step": 140000, "eval_re": [145.5929353353994, 109.84570426844934, 
103.00943724917201, 107.36710410334152, 143.648159728722, 129.08100529360263, 
180.24551273421815, 108.3034927423215, 119.82708273658767, 101.21851731332681], 
"eval_len": [28, 21, 20, 21, 28, 26, 34, 21, 23, 20]}

 15%|█▍        | 149999/1000000 [2:27:30<10:38:00, 22.20it/s]global step 150000, trans_decision ep_re 169.15810562764273

{"global_step": 150000, "eval_re": [123.83125022761139, 156.72011522215317, 
324.96547164402375, 130.90876084808386, 136.9372615435793, 113.80551646517222, 
381.34295217963756, 90.12190169864212, 124.89377411129672, 108.05405233622692], 
"eval_len": [24, 31, 62, 25, 27, 22, 73, 18, 25, 21]}

 16%|█▌        | 159998/1000000 [2:37:31<10:25:15, 22.39it/s]global step 160000, trans_decision ep_re 203.70337073016054

{"global_step": 160000, "eval_re": [354.6740913348706, 455.3119449109858, 
258.04748705355246, 125.32421688287889, 106.7773361063961, 118.46499690536459, 
259.5116119418895, 122.07323737854865, 141.33825088314677, 95.51053390397223], 
"eval_len": [65, 82, 50, 24, 21, 23, 53, 24, 28, 19]}

 17%|█▋        | 169999/1000000 [2:48:00<10:24:20, 22.16it/s]global step 170000, trans_decision ep_re 251.28412718844623

{"global_step": 170000, "eval_re": [103.6106195573649, 592.1289036930698, 
490.883399689758, 163.12678889638065, 90.5496840158472, 155.77332579332673, 
279.16452935201715, 146.9728726252082, 376.6623853900916, 113.96876287139881], 
"eval_len": [20, 111, 89, 31, 18, 30, 53, 28, 80, 22]}

 18%|█▊        | 179999/1000000 [2:58:10<10:12:22, 22.32it/s]global step 180000, trans_decision ep_re 152.30494913246568

{"global_step": 180000, "eval_re": [133.57147215739508, 161.28523316606746, 
141.08163545061592, 111.45186177997152, 96.00771837577625, 96.62664329066673, 
101.18341767119807, 146.8853363673495, 410.1816366085092, 124.77453645710723], 
"eval_len": [26, 31, 27, 22, 19, 19, 20, 28, 75, 24]}

 19%|█▉        | 189997/1000000 [3:08:30<10:13:44, 22.00it/s]global step 190000, trans_decision ep_re 148.67380710029693

{"global_step": 190000, "eval_re": [140.54850023469362, 312.88534309286666, 
89.91620423583512, 89.24208742291074, 287.22975315494847, 101.35677178817214, 
102.02680964189838, 131.62559684087336, 101.13157013650587, 130.7754344542647], 
"eval_len": [27, 60, 18, 18, 57, 20, 20, 25, 20, 25]}

 20%|█▉        | 199999/1000000 [3:18:40<10:03:29, 22.09it/s]global step 200000, trans_decision ep_re 169.8783183133923

{"global_step": 200000, "eval_re": [528.0168032849027, 129.26792016632146, 
134.7007150661243, 120.91689060470257, 147.8437096300482, 118.30304835714287, 
149.96545316700997, 90.49485198119989, 155.9323876574286, 123.34140321904229], 
"eval_len": [98, 25, 26, 24, 29, 23, 29, 18, 30, 24]}

 21%|██        | 209999/1000000 [3:28:50<9:54:29, 22.15it/s]global step 210000, trans_decision ep_re 162.11718647175366

{"global_step": 210000, "eval_re": [133.77734733940437, 339.7089804508772, 
259.041009281873, 117.09924720332697, 101.82672713420703, 89.5166325271384, 
159.35210688096882, 118.39586399801716, 173.26293373956958, 129.191016162154], 
"eval_len": [26, 62, 51, 23, 20, 18, 31, 23, 33, 25]}

 22%|██▏       | 219998/1000000 [3:38:52<9:39:32, 22.43it/s]global step 220000, trans_decision ep_re 134.96463541482044

{"global_step": 220000, "eval_re": [95.75094239152017, 104.8175209778732, 
89.31429064940212, 90.38020909179683, 411.32062512312433, 96.3771524775673, 
150.04961211016445, 125.37400287535706, 96.38801070882187, 89.87398774257701], 
"eval_len": [19, 21, 18, 18, 79, 19, 29, 24, 19, 18]}

 23%|██▎       | 229999/1000000 [3:49:20<9:41:40, 22.06it/s]global step 230000, trans_decision ep_re 199.58629502405313

{"global_step": 230000, "eval_re": [175.63957639795152, 431.7034644414543, 
90.09749677347011, 95.43502042618674, 317.6781178525356, 388.2085607300479, 
145.2369950701012, 112.31815237560795, 108.49767245974643, 131.0478937134296], 
"eval_len": [34, 97, 18, 19, 57, 73, 28, 22, 21, 25]}

 24%|██▍       | 239999/1000000 [3:59:30<9:25:52, 22.38it/s]global step 240000, trans_decision ep_re 157.6126363388976

{"global_step": 240000, "eval_re": [133.5601162046202, 89.52150985925832, 
95.61699515062645, 362.3724284989977, 149.9681211243915, 89.7885765919172, 
172.2621065423963, 118.25793223436597, 159.43015051198577, 205.3484266704165], 
"eval_len": [26, 18, 19, 65, 29, 18, 33, 23, 30, 39]}

 25%|██▍       | 249999/1000000 [4:09:40<9:20:47, 22.29it/s]global step 250000, trans_decision ep_re 276.4869338726007

{"global_step": 250000, "eval_re": [381.9283347856311, 154.8998691338042, 
168.47847845122524, 102.00158756949425, 506.3155734387339, 159.23498179591354, 
491.52015039116395, 112.0038460156601, 552.9173379788732, 135.56917916550782], 
"eval_len": [72, 30, 33, 20, 94, 30, 92, 22, 113, 26]}

 26%|██▌       | 259999/1000000 [4:19:50<9:15:20, 22.21it/s]global step 260000, trans_decision ep_re 201.17458132260572

{"global_step": 260000, "eval_re": [120.9017387727455, 89.92082851664392, 
151.43384858949213, 130.39795061137562, 119.55751087670265, 147.8571806065166, 
512.3804160002262, 505.4666142105183, 138.45825919517006, 95.37146584666604], 
"eval_len": [23, 18, 29, 26, 23, 29, 107, 93, 29, 19]}

 27%|██▋       | 269997/1000000 [4:30:00<9:02:59, 22.41it/s]global step 270000, trans_decision ep_re 173.1420648215392

{"global_step": 270000, "eval_re": [128.5942582535007, 108.06616043403312, 
90.992795173906, 96.01998397066193, 119.43665918904092, 84.37141567674558, 
112.94331706025262, 398.74697333151124, 467.9018605738694, 124.3472245518703], 
"eval_len": [25, 21, 18, 19, 23, 17, 22, 72, 87, 24]}

 28%|██▊       | 279997/1000000 [4:39:52<8:56:39, 22.36it/s]global step 280000, trans_decision ep_re 193.89398081438566

{"global_step": 280000, "eval_re": [359.512643699009, 96.6355291303712, 
327.08765921167264, 90.21718179018659, 126.23981805778077, 152.46282161773038, 
138.89022551631115, 95.67429629516172, 462.2805314794649, 89.9391013461679], 
"eval_len": [78, 19, 69, 18, 24, 29, 27, 19, 94, 18]}

 29%|██▉       | 289998/1000000 [4:50:01<8:46:37, 22.47it/s]global step 290000, trans_decision ep_re 208.9853803847115

{"global_step": 290000, "eval_re": [512.7343780454721, 141.46151882373206, 
112.06411661803519, 259.5170455304287, 167.19976235453834, 359.8891684278277, 
107.88354142445995, 164.95554889085045, 105.61544506081259, 158.533278670958], 
"eval_len": [99, 27, 22, 49, 33, 74, 21, 32, 21, 31]}

 30%|██▉       | 299999/1000000 [5:00:20<8:42:26, 22.33it/s]global step 300000, trans_decision ep_re 209.2136798303024

{"global_step": 300000, "eval_re": [96.2632379588309, 114.18547580673565, 
320.627732315177, 117.37062853139933, 175.66063665649295, 411.08829663871467, 
113.79056529980443, 124.42416986402534, 500.3279170608215, 118.39813817102234], 
"eval_len": [19, 22, 59, 23, 34, 74, 22, 24, 93, 23]}

 31%|███       | 309999/1000000 [5:10:30<8:38:14, 22.19it/s]global step 310000, trans_decision ep_re 117.48189020129105

{"global_step": 310000, "eval_re": [149.8941336396652, 89.3297894618972, 
112.29122333913489, 106.5888918198592, 126.20973559515151, 123.88826654694645, 
105.13644665714331, 125.58516358617004, 124.75795104727267, 111.13730031967005],
"eval_len": [29, 18, 22, 21, 25, 24, 21, 24, 24, 22]}

 32%|███▏      | 319999/1000000 [5:20:40<8:30:58, 22.18it/s]global step 320000, trans_decision ep_re 173.8855537941038

{"global_step": 320000, "eval_re": [332.69641479970136, 111.01043025803614, 
95.5482566391312, 435.12778063624796, 109.31197538817115, 124.97032674440844, 
95.96326286517905, 89.84793486003207, 141.03798705926516, 203.34116869086554], 
"eval_len": [65, 22, 19, 94, 21, 24, 19, 18, 27, 39]}

 33%|███▎      | 329999/1000000 [5:30:50<8:22:42, 22.21it/s]global step 330000, trans_decision ep_re 142.87435100879108

{"global_step": 330000, "eval_re": [118.95574153169846, 142.90925623429828, 
192.9304665156211, 102.12553604213922, 125.58778108123839, 131.57560977764854, 
281.01449350060216, 114.60262146588636, 129.0702575514027, 89.97174638737582], 
"eval_len": [23, 28, 37, 20, 24, 26, 52, 22, 25, 18]}

 34%|███▍      | 339999/1000000 [5:41:00<8:12:55, 22.32it/s]global step 340000, trans_decision ep_re 177.47478827685316

{"global_step": 340000, "eval_re": [392.83611962704924, 352.2644984943489, 
107.04821199422767, 113.83591054693274, 159.4040326278231, 138.9617728466717, 
156.5447108311678, 101.75229913357197, 112.78994749741, 139.31037916932831], 
"eval_len": [75, 68, 21, 22, 31, 27, 30, 20, 22, 27]}

 35%|███▍      | 349998/1000000 [5:50:51<7:59:12, 22.61it/s]global step 350000, trans_decision ep_re 187.4222221085784

{"global_step": 350000, "eval_re": [119.06460165903664, 119.39734697244874, 
323.2142685016953, 108.63574452597499, 102.47855338723564, 102.69097683831802, 
378.1502163729122, 377.4130335795069, 152.65646914491535, 90.52101010374031], 
"eval_len": [23, 23, 59, 21, 20, 20, 72, 73, 29, 18]}

 36%|███▌      | 359999/1000000 [6:01:00<7:57:50, 22.32it/s]global step 360000, trans_decision ep_re 180.76433805088197

{"global_step": 360000, "eval_re": [95.88224805501235, 441.07279534293366, 
127.60955911726164, 134.27289211484472, 103.22427745647705, 135.3918066467337, 
250.57324481702233, 224.00921411553472, 152.26312003366002, 143.3442228093394], 
"eval_len": [19, 88, 25, 26, 20, 26, 50, 43, 29, 28]}

 37%|███▋      | 369999/1000000 [6:11:20<7:56:27, 22.04it/s]global step 370000, trans_decision ep_re 142.65811495413098

{"global_step": 370000, "eval_re": [160.55053504189817, 115.01797108274707, 
89.22562875514004, 95.70115114742752, 101.74622963655065, 89.39028108715792, 
101.95972656380138, 473.2677290593554, 108.7971370632986, 90.92476010393291], 
"eval_len": [31, 23, 18, 19, 20, 18, 20, 90, 21, 18]}

 38%|███▊      | 379999/1000000 [6:21:30<7:45:53, 22.18it/s]global step 380000, trans_decision ep_re 198.483865031497

{"global_step": 380000, "eval_re": [271.2885614883551, 133.1633065147374, 
223.19835442572602, 470.5837562335775, 127.34140481689025, 113.70663708008885, 
101.78720457050264, 334.92988876934055, 119.05199555581747, 89.78754085993414], 
"eval_len": [54, 26, 42, 87, 25, 22, 20, 62, 23, 18]}

 39%|███▉      | 389999/1000000 [6:31:40<7:39:58, 22.10it/s]global step 390000, trans_decision ep_re 165.05531575413949

{"global_step": 390000, "eval_re": [107.4481039610434, 160.83688507586248, 
108.17604791731776, 148.9421465740258, 187.0285566514072, 118.77937266116203, 
101.25160508676538, 502.56410959010424, 108.41547002570043, 107.11085999800594],
"eval_len": [21, 31, 21, 29, 36, 23, 20, 95, 21, 21]}

 40%|███▉      | 399999/1000000 [6:41:50<7:31:27, 22.15it/s]global step 400000, trans_decision ep_re 173.18208176382862

{"global_step": 400000, "eval_re": [129.94111218639262, 107.1385632034429, 
202.37918220754014, 102.27011476595962, 148.74357441502335, 508.2333796075064, 
96.9581871413241, 109.4305272256748, 118.70844881344678, 208.01772807197545], 
"eval_len": [25, 21, 39, 20, 29, 97, 19, 21, 23, 39]}

 41%|████      | 409997/1000000 [6:52:00<7:20:20, 22.33it/s]global step 410000, trans_decision ep_re 131.59207444158696

{"global_step": 410000, "eval_re": [112.67092078948338, 101.97533937585845, 
96.16146331336138, 101.63272634876907, 318.34331746058416, 151.90413356266242, 
102.03475793160521, 112.09896280028588, 107.40542578052298, 111.69369705273654],
"eval_len": [22, 20, 19, 20, 62, 29, 20, 22, 21, 22]}

 42%|████▏     | 419997/1000000 [7:01:52<7:18:21, 22.05it/s]global step 420000, trans_decision ep_re 152.08769330551058

{"global_step": 420000, "eval_re": [95.49434324886039, 230.3263667465, 
340.46781039105525, 107.45336119193007, 154.79965027683846, 132.03869576140463, 
103.50048741891852, 89.21149626452345, 160.32413639485688, 107.2605853602182], 
"eval_len": [19, 45, 61, 21, 30, 25, 20, 18, 31, 21]}

 43%|████▎     | 429998/1000000 [7:12:01<7:07:01, 22.25it/s]global step 430000, trans_decision ep_re 135.04408208593622

{"global_step": 430000, "eval_re": [113.07557758415311, 108.31891814423781, 
96.72720771968895, 117.45715346308319, 126.36618841647949, 89.64783491374102, 
313.34596715307765, 84.31903364623354, 129.35259167793726, 171.83034814073014], 
"eval_len": [22, 21, 19, 23, 24, 18, 59, 17, 25, 33]}

 44%|████▍     | 439999/1000000 [7:22:20<7:01:39, 22.13it/s]global step 440000, trans_decision ep_re 144.24860234171263

{"global_step": 440000, "eval_re": [102.501633669235, 154.0259712932859, 
160.40336109832973, 279.5597090656687, 89.71933285105783, 143.743484814403, 
96.31912547487472, 113.0288826386782, 102.82155120526646, 200.362971306327], 
"eval_len": [20, 30, 31, 52, 18, 28, 19, 22, 20, 39]}

 45%|████▍     | 449999/1000000 [7:32:30<6:55:07, 22.08it/s]global step 450000, trans_decision ep_re 234.81498839411628

{"global_step": 450000, "eval_re": [543.3674132076835, 133.2635987772525, 
128.87010476809866, 354.5246479516952, 337.34957116344566, 127.91503972620282, 
357.8483988887265, 89.86263669331785, 163.7638341152517, 111.38463864948844], 
"eval_len": [102, 26, 25, 65, 62, 25, 68, 18, 31, 22]}

 46%|████▌     | 459999/1000000 [7:42:40<6:45:41, 22.18it/s]global step 460000, trans_decision ep_re 160.2316166230468

{"global_step": 460000, "eval_re": [96.26544527672174, 430.71856448043474, 
189.68026870568417, 113.21295133388969, 129.21878837259334, 132.60191376340768, 
166.21221014530767, 112.63473119606181, 101.33612648821108, 130.4351664681557], 
"eval_len": [19, 86, 36, 22, 25, 26, 32, 22, 20, 26]}

 47%|████▋     | 469999/1000000 [7:52:50<6:41:03, 22.02it/s]global step 470000, trans_decision ep_re 137.13362527238672

{"global_step": 470000, "eval_re": [102.19759409615845, 122.45245392036983, 
168.6838971517965, 341.49646807042586, 96.70588647532313, 102.56113892452055, 
89.3051626692935, 96.23223342845485, 128.03670812265068, 123.66470986487386], 
"eval_len": [20, 24, 33, 62, 19, 20, 18, 19, 25, 24]}

 48%|████▊     | 479999/1000000 [8:02:42<6:30:18, 22.20it/s]global step 480000, trans_decision ep_re 183.07469350688854

{"global_step": 480000, "eval_re": [397.15174197681137, 108.00169144779478, 
97.25435167039731, 106.5745255694426, 118.5680795904081, 97.06183462174212, 
139.24651587211758, 161.14942137064813, 447.2382690853629, 158.50050386416035], 
"eval_len": [74, 21, 19, 21, 23, 19, 27, 31, 82, 30]}

 49%|████▉     | 489999/1000000 [8:13:00<6:20:13, 22.36it/s]global step 490000, trans_decision ep_re 231.92184008582598

{"global_step": 490000, "eval_re": [431.7111092363737, 102.43053374169745, 
168.68204085529246, 336.0052758970252, 95.78160448618532, 128.1724894306462, 
161.23170724461417, 159.90961757637135, 387.8137081669518, 347.48031422310214], 
"eval_len": [80, 20, 32, 64, 19, 26, 31, 32, 72, 64]}

 50%|████▉     | 499999/1000000 [8:23:10<6:14:25, 22.26it/s]global step 500000, trans_decision ep_re 142.0247351133269

{"global_step": 500000, "eval_re": [90.37177172743243, 340.6417050737066, 
216.79340399835885, 89.31109716689203, 89.60285367327641, 112.86034571130382, 
160.9066146612318, 140.31872247811506, 90.01788212533677, 89.42295451761512], 
"eval_len": [18, 70, 41, 18, 18, 22, 31, 27, 18, 18]}

 51%|█████     | 509999/1000000 [8:33:20<6:04:33, 22.40it/s]global step 510000, trans_decision ep_re 126.21985903476354

{"global_step": 510000, "eval_re": [111.41962886920805, 106.71258834743615, 
113.91615139075327, 84.52291104033283, 96.90439238857427, 97.01343040344946, 
309.83622649229, 120.54847196529991, 107.32182996401959, 114.00295948627203], 
"eval_len": [22, 21, 22, 17, 19, 19, 58, 23, 21, 22]}

 52%|█████▏    | 519998/1000000 [8:43:10<5:55:23, 22.51it/s]global step 520000, trans_decision ep_re 181.09680467531058

{"global_step": 520000, "eval_re": [360.5766663475943, 125.18750321274769, 
373.35898513290925, 129.5381628533121, 134.41953809875827, 108.54019100604427, 
154.50601144953237, 133.89552437384515, 151.96331883086765, 138.9821454474947], 
"eval_len": [69, 25, 70, 25, 26, 21, 31, 26, 29, 27]}

 53%|█████▎    | 529999/1000000 [8:53:30<5:51:42, 22.27it/s]global step 530000, trans_decision ep_re 247.24512951694828

{"global_step": 530000, "eval_re": [256.41932421848554, 116.3239193289532, 
216.33829264469037, 128.6300430806076, 348.8562023661993, 587.6156543146196, 
420.2266251735028, 107.62545080105548, 148.21308281521942, 142.20270042614928], 
"eval_len": [48, 23, 41, 25, 66, 126, 79, 21, 28, 27]}

 54%|█████▍    | 539999/1000000 [9:03:40<5:43:58, 22.29it/s]global step 540000, trans_decision ep_re 130.04921461704748

{"global_step": 540000, "eval_re": [131.00567910257448, 181.44739702385664, 
150.78714418170279, 131.4771654233217, 100.90183724895054, 141.11449404230433, 
96.49758768194945, 121.93695633255673, 143.01746795134252, 102.30641718191586], 
"eval_len": [25, 35, 29, 25, 20, 27, 19, 24, 28, 20]}

 55%|█████▍    | 549999/1000000 [9:13:50<5:36:41, 22.28it/s]global step 550000, trans_decision ep_re 174.409925220326

{"global_step": 550000, "eval_re": [259.62879363323555, 128.42337504103642, 
127.43121734825206, 315.3362902957252, 163.09866327474225, 146.38157712061243, 
152.19037299307797, 158.74235905842784, 141.5173898006294, 151.34921363752082], 
"eval_len": [54, 25, 25, 60, 32, 29, 29, 30, 27, 29]}

 56%|█████▌    | 559997/1000000 [9:24:00<5:29:51, 22.23it/s]global step 560000, trans_decision ep_re 209.56095235802772

{"global_step": 560000, "eval_re": [475.69894915042, 108.8836574608797, 
349.2444848664918, 100.55088531398434, 136.4508075651621, 428.3337950010287, 
103.58766449365045, 112.37308157146796, 151.96417513067826, 128.5220230265136], 
"eval_len": [86, 21, 66, 20, 26, 78, 20, 22, 29, 25]}

 57%|█████▋    | 569999/1000000 [9:33:53<5:20:44, 22.34it/s]global step 570000, trans_decision ep_re 155.1035563781361

{"global_step": 570000, "eval_re": [195.2514275808795, 97.17420577558984, 
157.35240352367663, 96.14219207744564, 108.51173576739693, 141.46094568793475, 
120.00437351401918, 89.9075631050747, 399.9584429823616, 145.27227376698204], 
"eval_len": [38, 19, 30, 19, 21, 27, 23, 18, 76, 28]}

 58%|█████▊    | 579999/1000000 [9:44:10<5:10:58, 22.51it/s]global step 580000, trans_decision ep_re 149.51629296549737

{"global_step": 580000, "eval_re": [90.272572274789, 134.68784591275815, 
330.70376275155513, 163.76274482109162, 107.10706697374506, 211.5701334723109, 
131.39664458934942, 113.15350218441776, 105.60769623906968, 106.90096043588716],
"eval_len": [18, 26, 62, 32, 21, 41, 25, 22, 21, 21]}

 59%|█████▉    | 589999/1000000 [9:54:00<5:03:27, 22.52it/s]global step 590000, trans_decision ep_re 139.58835803419933

{"global_step": 590000, "eval_re": [105.24044597831553, 101.84084345562529, 
113.86428006959953, 158.66577505499657, 89.17336342360775, 106.4724120156032, 
111.3914944373699, 283.22108049834895, 208.51840116404517, 117.49548424448153], 
"eval_len": [21, 20, 22, 30, 18, 21, 22, 56, 40, 23]}

 60%|█████▉    | 599999/1000000 [10:04:20<4:58:09, 22.36it/s]global step 600000, trans_decision ep_re 159.65074398446546

{"global_step": 600000, "eval_re": [126.81405930014242, 167.4181624821591, 
124.7070796516184, 222.90411543630435, 96.25876202537412, 84.3626902049534, 
356.94414489501605, 123.1244371968186, 174.29187595564056, 119.68211269662766], 
"eval_len": [25, 34, 24, 46, 19, 17, 67, 24, 33, 23]}

 61%|██████    | 609999/1000000 [10:14:20<4:51:23, 22.31it/s]global step 610000, trans_decision ep_re 172.6262918327635

{"global_step": 610000, "eval_re": [118.6805286495524, 91.28656875517677, 
144.00337933380416, 303.04029081323375, 214.2908863600433, 102.58461058812745, 
322.59326539873496, 178.09780314278765, 145.15546007901096, 106.53012520716375],
"eval_len": [23, 18, 28, 59, 42, 20, 60, 34, 28, 21]}

 62%|██████▏   | 619998/1000000 [10:24:12<4:37:53, 22.79it/s]global step 620000, trans_decision ep_re 133.41197370315663

{"global_step": 620000, "eval_re": [183.91064001796644, 100.79846428289912, 
97.43356584599562, 178.90055835629096, 152.76204238710537, 89.22261516730751, 
117.5610462341775, 117.92561709083316, 89.69264835860513, 205.91253929038544], 
"eval_len": [36, 20, 19, 34, 29, 18, 23, 23, 18, 39]}

 63%|██████▎   | 629999/1000000 [10:34:30<4:36:13, 22.32it/s]global step 630000, trans_decision ep_re 140.0088103520042

{"global_step": 630000, "eval_re": [124.49918858034515, 143.382260592662, 
120.08581161506794, 100.0045009640664, 112.54034065733498, 113.08185974351092, 
102.50206509607665, 133.36176712897293, 347.9657002446862, 102.66460889731894], 
"eval_len": [24, 28, 23, 20, 22, 22, 20, 26, 64, 20]}

 64%|██████▍   | 639998/1000000 [10:44:21<4:23:56, 22.73it/s]global step 640000, trans_decision ep_re 169.17338862768295

{"global_step": 640000, "eval_re": [172.30016902612067, 146.2937538096337, 
119.68767957594709, 106.64855602497879, 197.94534096058243, 168.87287697561635, 
186.88638074272032, 107.47498594995128, 384.07454438429556, 101.54959882698346],
"eval_len": [34, 29, 23, 21, 38, 32, 36, 21, 71, 20]}

 65%|██████▍   | 649999/1000000 [10:54:40<4:19:33, 22.47it/s]global step 650000, trans_decision ep_re 183.28398954780164

{"global_step": 650000, "eval_re": [108.21247060376888, 138.5097016431054, 
382.3395363165852, 124.84013363150153, 109.42509975339428, 120.1981360470588, 
201.4570368028547, 152.78704487074523, 340.9171003520552, 154.15363545694717], 
"eval_len": [21, 28, 71, 24, 21, 23, 42, 30, 63, 30]}

 66%|██████▌   | 659998/1000000 [11:04:31<4:09:59, 22.67it/s]global step 660000, trans_decision ep_re 120.6225528767618

{"global_step": 660000, "eval_re": [181.551725465516, 141.02497991284275, 
125.01167629712145, 97.24427183552561, 139.45999442189088, 101.73131945784024, 
97.08291316801476, 96.32507904042615, 136.35699840173862, 90.4365707667017], 
"eval_len": [36, 28, 24, 19, 27, 20, 19, 19, 28, 18]}

 67%|██████▋   | 669999/1000000 [11:14:50<4:05:34, 22.40it/s]global step 670000, trans_decision ep_re 127.11492731118112

{"global_step": 670000, "eval_re": [96.48395114553489, 102.53550654558813, 
90.30153886137796, 185.05835575151409, 141.60952771985743, 108.42098680905382, 
163.73650532128656, 124.45149939266174, 168.36042546724823, 90.19097609768855], 
"eval_len": [19, 20, 18, 36, 27, 21, 31, 24, 32, 18]}

 68%|██████▊   | 679998/1000000 [11:24:41<3:55:59, 22.60it/s]global step 680000, trans_decision ep_re 173.8773039261096

{"global_step": 680000, "eval_re": [144.70958223122233, 118.385394261025, 
132.26316394378463, 384.0003904585232, 310.47398324460175, 153.16967406121552, 
84.2403650916725, 101.128556994414, 201.35480886093052, 109.04712011370661], 
"eval_len": [28, 23, 26, 72, 59, 30, 17, 20, 39, 21]}

 69%|██████▉   | 689999/1000000 [11:35:00<3:52:02, 22.27it/s]global step 690000, trans_decision ep_re 228.07077514513307

{"global_step": 690000, "eval_re": [194.3609947952024, 700.4389037756081, 
84.22304102248472, 101.90857387394537, 131.72279370091405, 113.18126739117956, 
124.75421008853525, 338.49880658818495, 353.42931843758896, 138.18984177768738],
"eval_len": [39, 131, 17, 20, 26, 22, 24, 61, 76, 27]}

 70%|██████▉   | 699997/1000000 [11:44:51<3:44:53, 22.23it/s]global step 700000, trans_decision ep_re 167.48465004791905

{"global_step": 700000, "eval_re": [149.92331860442118, 95.55640080609466, 
333.79382886450554, 96.42445756691598, 144.67479678385035, 107.42593116764355, 
136.77186869096755, 134.0419552693472, 176.9430136008532, 299.29092912459134], 
"eval_len": [29, 19, 65, 19, 28, 21, 26, 26, 37, 53]}

 71%|███████   | 709999/1000000 [11:55:10<3:36:36, 22.31it/s]global step 710000, trans_decision ep_re 153.85548616459306

{"global_step": 710000, "eval_re": [145.84592311522448, 111.0703472531995, 
167.40111952145475, 403.2762032698181, 118.97145395360951, 110.49211723931165, 
151.6328057590474, 108.72140075341851, 102.25547045922917, 118.88802032161733], 
"eval_len": [28, 22, 33, 74, 23, 22, 29, 21, 20, 23]}

 72%|███████▏  | 719999/1000000 [12:05:10<3:27:22, 22.50it/s]global step 720000, trans_decision ep_re 170.20778401677032

{"global_step": 720000, "eval_re": [193.83446022998817, 84.27133852405123, 
351.37250154746505, 168.2486774140078, 131.67502287259094, 286.77272982624254, 
129.6287210563976, 118.97532016413064, 148.09060300712594, 89.20846552570333], 
"eval_len": [36, 17, 63, 32, 26, 56, 25, 23, 29, 18]}

 73%|███████▎  | 729999/1000000 [12:15:20<3:21:23, 22.34it/s]global step 730000, trans_decision ep_re 165.81147311643352

{"global_step": 730000, "eval_re": [134.50658144414274, 347.99974074898864, 
113.83530875981232, 89.7446701847561, 101.94247290432119, 151.41509597692115, 
90.15986023684086, 165.08774610115103, 106.88899720544298, 356.53425760195796], 
"eval_len": [26, 63, 22, 18, 20, 29, 18, 32, 21, 72]}

 74%|███████▍  | 739998/1000000 [12:25:11<3:11:50, 22.59it/s]global step 740000, trans_decision ep_re 202.26563376201938

{"global_step": 740000, "eval_re": [483.9899894384275, 369.0302347776942, 
177.30639388937703, 114.14001399610117, 120.54843273032024, 173.8705039483544, 
131.19947032979476, 89.74002630767862, 219.170056984129, 143.66121521831693], 
"eval_len": [98, 65, 34, 22, 23, 33, 26, 18, 42, 29]}

 75%|███████▍  | 749999/1000000 [12:35:30<3:06:07, 22.39it/s]global step 750000, trans_decision ep_re 180.11130400645857

{"global_step": 750000, "eval_re": [119.12252692070906, 91.33469135417664, 
139.18396588315426, 177.87298914154394, 120.92317670020171, 132.77002577844004, 
136.23367902606313, 95.89830486164308, 343.19721008477256, 444.5764703138811], 
"eval_len": [23, 18, 27, 34, 23, 26, 26, 19, 66, 86]}

 76%|███████▌  | 759999/1000000 [12:45:22<2:58:14, 22.44it/s]global step 760000, trans_decision ep_re 200.0978181046833

{"global_step": 760000, "eval_re": [315.91566487720013, 169.174591491425, 
611.9698094690926, 102.60468955987349, 90.84430330629395, 184.9493375975983, 
119.10296538699004, 97.07441750018707, 123.48543047419416, 185.85697138397802], 
"eval_len": [63, 33, 106, 20, 18, 36, 23, 19, 24, 36]}

 77%|███████▋  | 769999/1000000 [12:55:40<2:50:39, 22.46it/s]global step 770000, trans_decision ep_re 187.541020296407

{"global_step": 770000, "eval_re": [215.21316034228448, 150.333880366542, 
118.52230714478654, 129.35747490711023, 102.3916315098332, 245.4872753079282, 
327.8072156076356, 89.22417537548623, 119.65858269559415, 377.41449970686966], 
"eval_len": [41, 29, 23, 25, 20, 46, 61, 18, 23, 70]}

 78%|███████▊  | 779999/1000000 [13:05:50<2:44:27, 22.29it/s]global step 780000, trans_decision ep_re 251.4175360278476

{"global_step": 780000, "eval_re": [351.64513004024457, 467.3817095996423, 
126.0666013767873, 430.76487980167866, 123.79194291093633, 118.82505790530395, 
151.37760270872315, 120.69278080145439, 116.44550514182006, 507.18414999188514],
"eval_len": [64, 92, 24, 82, 24, 23, 29, 23, 23, 94]}

 79%|███████▉  | 789999/1000000 [13:15:50<2:35:59, 22.44it/s]global step 790000, trans_decision ep_re 220.44883275636116

{"global_step": 790000, "eval_re": [131.18135586362, 101.5367379461141, 
95.8083315887432, 360.6266389315846, 119.55866119767467, 482.07386245241787, 
144.5267052753304, 96.67547233768809, 145.8063679282858, 526.6941940421528], 
"eval_len": [25, 20, 19, 63, 23, 86, 28, 19, 29, 96]}

 80%|███████▉  | 799999/1000000 [13:26:00<2:28:35, 22.43it/s]global step 800000, trans_decision ep_re 153.02366563297784

{"global_step": 800000, "eval_re": [136.3211822567213, 160.19551281626994, 
113.39436811499102, 118.29369907479239, 107.20693824847069, 142.88207591027466, 
117.43298008803309, 95.59777449028313, 123.70291386204609, 415.20921146789595], 
"eval_len": [26, 31, 23, 23, 21, 27, 23, 19, 24, 89]}

 81%|████████  | 809999/1000000 [13:35:53<2:21:36, 22.36it/s]global step 810000, trans_decision ep_re 192.5139470625962

{"global_step": 810000, "eval_re": [329.85313635480605, 97.73922466916328, 
158.3386464905341, 121.45897212947922, 119.01559572931413, 376.89277317325633, 
101.99801910842784, 102.19287546064336, 127.47244471543839, 390.17778279489926],
"eval_len": [61, 19, 30, 24, 23, 80, 20, 20, 25, 73]}

 82%|████████▏ | 819999/1000000 [13:46:10<2:14:43, 22.27it/s]global step 820000, trans_decision ep_re 188.06477469677708

{"global_step": 820000, "eval_re": [118.93799430582538, 166.69678359356593, 
557.8434460386511, 175.72693027155444, 105.23312489175612, 120.66684423905947, 
238.83367408296272, 183.54406328682424, 110.65911372346842, 102.5057725341027], 
"eval_len": [23, 32, 101, 34, 21, 24, 47, 35, 22, 20]}

 83%|████████▎ | 829999/1000000 [13:56:20<2:07:20, 22.25it/s]global step 830000, trans_decision ep_re 177.17312504355786

{"global_step": 830000, "eval_re": [112.54995512878946, 102.00192613310232, 
96.69601582164519, 429.7492216308936, 347.72878231727066, 202.60763076412078, 
119.7157885649526, 135.20742664727322, 123.11739398806048, 102.35710943947025], 
"eval_len": [22, 20, 19, 78, 67, 39, 23, 26, 24, 20]}

 84%|████████▍ | 839998/1000000 [14:06:11<1:57:29, 22.70it/s]global step 840000, trans_decision ep_re 137.78332570768953

{"global_step": 840000, "eval_re": [96.35830121551739, 108.52621171667877, 
355.238454137184, 133.72167337770466, 102.2923245190891, 125.89178247032123, 
100.54363107961129, 129.15187118009712, 102.42288029898978, 123.68612708170201],
"eval_len": [19, 21, 67, 26, 20, 25, 20, 25, 20, 24]}

 85%|████████▍ | 849999/1000000 [14:16:30<1:52:08, 22.29it/s]global step 850000, trans_decision ep_re 121.24780004776748

{"global_step": 850000, "eval_re": [125.33068433267977, 152.71309942205514, 
144.90757079203928, 89.9163968738732, 114.11921567040528, 103.1915963851377, 
136.26614194504822, 102.26581940615564, 118.0544957069725, 125.71297994330793], 
"eval_len": [24, 29, 28, 18, 22, 20, 26, 20, 23, 25]}

 86%|████████▌ | 859997/1000000 [14:26:40<1:44:31, 22.32it/s]global step 860000, trans_decision ep_re 136.37041572419145

{"global_step": 860000, "eval_re": [136.4745987947724, 113.82977385475189, 
192.74816714512392, 151.40282882020483, 129.18538812543196, 89.3082844099767, 
127.80515004161226, 137.95889990424735, 195.6674850932468, 89.32358105254619], 
"eval_len": [26, 22, 37, 29, 25, 18, 25, 27, 37, 18]}

 87%|████████▋ | 869999/1000000 [14:36:31<1:37:15, 22.28it/s]global step 870000, trans_decision ep_re 171.89187748670145

{"global_step": 870000, "eval_re": [95.55803360446079, 348.3559111846693, 
96.12712795295992, 107.77142011676607, 377.18848016178384, 213.14909262611684, 
143.86986362677735, 128.7695685252802, 111.52947004827121, 96.59980701992887], 
"eval_len": [19, 70, 19, 21, 69, 40, 28, 26, 22, 19]}

 88%|████████▊ | 879999/1000000 [14:46:51<1:29:13, 22.42it/s]global step 880000, trans_decision ep_re 183.36681591863572

{"global_step": 880000, "eval_re": [107.36198883853606, 174.4139075351669, 
114.0270351819245, 128.35831470563753, 90.86855070365115, 642.9665208090331, 
129.68771313718426, 151.83964595413465, 187.03293205201734, 107.11155026907187],
"eval_len": [21, 34, 22, 25, 18, 121, 25, 30, 36, 21]}

 89%|████████▉ | 889999/1000000 [14:56:51<1:20:26, 22.79it/s]global step 890000, trans_decision ep_re 230.96678310208782

{"global_step": 890000, "eval_re": [564.0582539799645, 447.7175663630431, 
84.45320134060597, 108.79096284645735, 89.39313014042536, 199.08394165127424, 
384.50103037466084, 156.7745351655218, 178.48190042805672, 96.41330873086848], 
"eval_len": [111, 80, 17, 21, 18, 38, 72, 31, 34, 19]}

 90%|████████▉ | 899999/1000000 [15:06:43<1:13:56, 22.54it/s]global step 900000, trans_decision ep_re 133.20651293808487

{"global_step": 900000, "eval_re": [108.00885674289297, 135.71406343069643, 
130.95866334189333, 108.67099852373578, 142.44475878936856, 112.91329607910934, 
138.896419682947, 131.7844665464326, 138.18629170320375, 184.48731454056878], 
"eval_len": [21, 26, 25, 21, 27, 22, 27, 26, 27, 35]}

 91%|█████████ | 909999/1000000 [15:17:01<1:07:44, 22.14it/s]global step 910000, trans_decision ep_re 120.71655535641466

{"global_step": 910000, "eval_re": [238.558492279463, 96.21092475284392, 
97.28920333739725, 96.6959199017771, 95.7426689918885, 136.53067007913924, 
89.72587397179211, 133.69461379420403, 103.41957874927093, 119.29760770637073], 
"eval_len": [45, 19, 19, 19, 19, 29, 18, 26, 20, 23]}

 92%|█████████▏| 919999/1000000 [15:27:11<1:00:21, 22.09it/s]global step 920000, trans_decision ep_re 211.10727534058782

{"global_step": 920000, "eval_re": [315.5947149472941, 178.7303373196642, 
123.72953155723253, 95.96642287401681, 598.4412383988583, 134.34264114974263, 
283.4724651935217, 123.06738674219957, 112.94320115063114, 144.7848140727171], 
"eval_len": [57, 35, 24, 19, 107, 26, 59, 24, 22, 28]}

 93%|█████████▎| 929999/1000000 [15:37:11<52:36, 22.18it/s]global step 930000, trans_decision ep_re 187.92937551399456

{"global_step": 930000, "eval_re": [113.55012270265848, 310.8169482928939, 
119.36013812955713, 114.10156082724663, 442.71129453336954, 111.89501313947663, 
337.6190490166884, 90.24987615820967, 149.29079398320857, 89.69895835663638], 
"eval_len": [22, 60, 23, 22, 80, 22, 65, 18, 30, 18]}

 94%|█████████▍| 939998/1000000 [15:47:22<44:37, 22.41it/s]global step 940000, trans_decision ep_re 208.70066139779837

{"global_step": 940000, "eval_re": [255.95535781905676, 113.43500134891322, 
432.9407677290553, 332.78154808768363, 95.8654418741715, 155.12960915387808, 
356.5510482551582, 118.04577808913885, 102.38709541335926, 123.9149662075689], 
"eval_len": [51, 22, 84, 61, 19, 30, 65, 23, 20, 24]}

 95%|█████████▍| 949999/1000000 [15:57:51<37:32, 22.20it/s]global step 950000, trans_decision ep_re 160.1754692510513

{"global_step": 950000, "eval_re": [329.5741346404942, 107.53764045378806, 
128.68413748140074, 97.29790897202285, 165.49856151795717, 117.799286650763, 
102.31401305603107, 332.65676236302573, 106.7559777792084, 113.6362695958218], 
"eval_len": [63, 21, 25, 19, 32, 23, 20, 66, 21, 22]}

 96%|█████████▌| 959997/1000000 [16:07:43<30:09, 22.11it/s]global step 960000, trans_decision ep_re 193.89809040944422

{"global_step": 960000, "eval_re": [140.64026974170693, 148.6170337026063, 
112.63580441822087, 118.27261955358188, 139.4356540604529, 743.8857526302234, 
218.45823096968482, 106.69194665195529, 120.33017649240053, 90.0134158736093], 
"eval_len": [28, 29, 22, 23, 27, 139, 42, 21, 23, 18]}

 97%|█████████▋| 969999/1000000 [16:18:11<22:33, 22.16it/s]global step 970000, trans_decision ep_re 215.96303048328633

{"global_step": 970000, "eval_re": [113.84301622713289, 138.86732864623033, 
133.3500428036033, 120.46894124030005, 107.92708279925122, 458.8464520028264, 
108.56535838961992, 441.47250658850135, 105.86668495913372, 430.422891176264], 
"eval_len": [22, 27, 26, 23, 21, 82, 21, 82, 21, 83]}

 98%|█████████▊| 979998/1000000 [16:28:21<14:48, 22.52it/s]global step 980000, trans_decision ep_re 224.45559707932995

{"global_step": 980000, "eval_re": [318.53143863059563, 317.0797418674394, 
134.99003440650378, 367.5275511172147, 131.7040737545512, 113.59582530537087, 
123.82453482072798, 286.249819191324, 177.53555929037378, 273.51739240919795], 
"eval_len": [63, 66, 26, 72, 25, 22, 24, 57, 34, 57]}

 99%|█████████▉| 989999/1000000 [16:38:31<07:31, 22.16it/s]global step 990000, trans_decision ep_re 151.2962644666102

{"global_step": 990000, "eval_re": [317.2761326011247, 113.77937069050465, 
151.446186367666, 160.14881044564135, 144.3139748449993, 102.22804687347927, 
155.3000219503014, 120.54715142098644, 142.0295379189764, 105.89341155242255], 
"eval_len": [64, 22, 29, 31, 28, 20, 30, 24, 29, 21]}

100%|█████████▉| 999999/1000000 [16:48:41<00:00, 22.11it/s]global step 1000000, trans_decision ep_re 165.70622528836918

{"global_step": 1000000, "eval_re": [140.3395470620886, 188.435331525698, 
122.08971498947817, 148.48932457764815, 185.64170724603252, 89.18981835626057, 
89.77342580542901, 114.38566456263426, 146.4263442523748, 432.29137450604776], 
"eval_len": [27, 35, 24, 29, 36, 18, 18, 22, 29, 88]}

100%|██████████| 1000000/1000000 [16:48:46<00:00, 16.52it/s]
