
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:14<10:31:23, 26.13it/s]global step 10000, trans_decision ep_re 85.51858563703351

{"global_step": 10000, "eval_re": [154.0656449988122, 120.82324109731009, 
135.14861431500003, 24.70900957329203, 22.348828645764634, 108.50462008817064, 
92.28080929830922, 110.93043579672727, 68.32036853594695, 18.054284021002065], 
"eval_len": [97, 79, 85, 23, 26, 77, 73, 77, 53, 22]}

  2%|▏         | 19998/1000000 [12:42<10:20:32, 26.32it/s]global step 20000, trans_decision ep_re 68.86204451133679

{"global_step": 20000, "eval_re": [28.289350791034813, 98.45529432957716, 
17.50191819107675, 17.655585475342786, 70.7101275596763, 122.30593561663464, 
132.9197573137217, 107.48527547039087, 66.7828529511506, 26.514347414762394], 
"eval_len": [28, 68, 26, 27, 50, 100, 86, 66, 51, 26]}

  3%|▎         | 29999/1000000 [21:10<10:20:30, 26.05it/s]global step 30000, trans_decision ep_re 92.1720141511778

{"global_step": 30000, "eval_re": [28.143921521811816, 99.57166264186374, 
94.33153591673404, 15.473170586854788, 131.2586877730441, 68.62350511974489, 
122.63018780225026, 21.899580404514687, 198.13902212405196, 141.64886762090768],
"eval_len": [31, 69, 78, 20, 95, 53, 86, 29, 123, 86]}

  4%|▍         | 39999/1000000 [29:50<10:09:28, 26.25it/s]global step 40000, trans_decision ep_re 109.93238015768475

{"global_step": 40000, "eval_re": [120.00233394315431, 26.275407640948426, 
193.2769922163508, 176.6314147985745, 74.93202965945545, 118.9067357579526, 
19.665773612954773, 112.00654215475822, 110.05785162659772, 147.56872016610066],
"eval_len": [89, 28, 130, 124, 80, 93, 22, 102, 79, 98]}

  5%|▍         | 49999/1000000 [38:10<10:07:59, 26.04it/s]global step 50000, trans_decision ep_re 33.87035757613705

{"global_step": 50000, "eval_re": [31.54482891830186, 35.758602606548195, 
22.30782548821947, 27.920521063070133, 36.98282801756544, 17.80407821466461, 
103.82413998913698, 20.231201617186706, 25.63978685430789, 16.68976299236924], 
"eval_len": [35, 31, 30, 29, 33, 23, 92, 24, 29, 20]}

  6%|▌         | 59999/1000000 [46:50<9:58:20, 26.18it/s]global step 60000, trans_decision ep_re 65.79398481963447

{"global_step": 60000, "eval_re": [24.411202016094723, 93.43315764544373, 
92.69521989853708, 103.3670270319169, 26.761786148200336, 70.68963917479363, 
28.933806129530353, 125.70101932101731, 24.607014188371924, 67.33997664243873], 
"eval_len": [30, 60, 66, 72, 30, 50, 28, 94, 30, 55]}

  7%|▋         | 69999/1000000 [55:20<9:56:10, 26.00it/s]global step 70000, trans_decision ep_re 49.53027540356257

{"global_step": 70000, "eval_re": [17.84384957170588, 25.203052780912785, 
23.822697347072072, 71.41152981256721, 51.25944894355716, 102.39646184709608, 
82.93818647841051, 26.7782631159005, 74.70352000239465, 18.945744136008805], 
"eval_len": [25, 28, 29, 57, 44, 90, 54, 27, 63, 28]}

  8%|▊         | 79999/1000000 [1:03:50<9:47:56, 26.08it/s]global step 80000, trans_decision ep_re 30.475527040991654

{"global_step": 80000, "eval_re": [42.56823664954624, 27.13645226080785, 
27.86932819644939, 83.0544202318085, 18.941450170721463, 14.036860417114791, 
21.91838384532235, 17.36794322830248, 27.23058199591687, 24.631613413926555], 
"eval_len": [39, 27, 29, 54, 26, 17, 28, 25, 28, 27]}

  9%|▉         | 89997/1000000 [1:12:20<9:45:59, 25.88it/s]global step 90000, trans_decision ep_re 104.38289335126706

{"global_step": 90000, "eval_re": [99.15916454920288, 20.929042149834626, 
83.8127205926142, 158.31585317993745, 69.68089687391976, 24.143762124759522, 
166.71128282088353, 160.44438792562272, 69.23892477015303, 191.3928985257427], 
"eval_len": [62, 27, 65, 90, 60, 26, 103, 101, 57, 114]}

 10%|▉         | 99998/1000000 [1:20:35<9:32:15, 26.21it/s]global step 100000, trans_decision ep_re 72.85212262676153

{"global_step": 100000, "eval_re": [30.2228619963589, 75.13000413181042, 
112.53059972041997, 126.78475690602185, 139.41249231100085, 20.143050051375596, 
152.79152684947695, 26.426869173499938, 23.295636029489444, 21.78342909816126], 
"eval_len": [27, 56, 86, 83, 85, 21, 94, 29, 35, 24]}

 11%|█         | 109999/1000000 [1:29:05<9:30:45, 25.99it/s]global step 110000, trans_decision ep_re 94.65612361344935

{"global_step": 110000, "eval_re": [16.988170877587947, 16.92643447765624, 
84.96254476839256, 81.24757547875295, 90.21658288036483, 151.07217915576328, 
246.71707119778264, 134.57910254413974, 21.609688178100125, 102.24188657595329],
"eval_len": [23, 24, 64, 59, 60, 107, 142, 92, 26, 65]}

 12%|█▏        | 119999/1000000 [1:37:34<9:24:44, 25.97it/s]global step 120000, trans_decision ep_re 102.711747105115

{"global_step": 120000, "eval_re": [141.028560508508, 16.593945423117706, 
20.016923597359234, 113.94432387140336, 20.955524086286506, 149.71730699777285, 
23.269733238602328, 366.24936123116413, 24.390597861885812, 150.95119423505014],
"eval_len": [93, 20, 23, 72, 24, 97, 26, 198, 27, 94]}

 13%|█▎        | 129997/1000000 [1:46:04<9:17:38, 26.00it/s]global step 130000, trans_decision ep_re 91.4010753817169

{"global_step": 130000, "eval_re": [139.19819345029433, 160.02222059374293, 
22.129466676909896, 12.7498348150176, 253.25588951756126, 14.921506788475082, 
17.292287877022954, 13.904059649987115, 132.98990588951904, 147.54738855863874],
"eval_len": [95, 118, 26, 19, 153, 17, 19, 20, 94, 100]}

 14%|█▍        | 139997/1000000 [1:54:34<9:10:19, 26.04it/s]global step 140000, trans_decision ep_re 92.80667968192824

{"global_step": 140000, "eval_re": [123.57912480946703, 93.8428607892765, 
242.3828343679328, 21.643800524926654, 10.41628902918534, 21.574334673639424, 
17.875698548154705, 18.761145696115037, 181.23562133137514, 196.75508704920986],
"eval_len": [85, 65, 121, 28, 15, 22, 21, 26, 94, 122]}

 15%|█▍        | 149998/1000000 [2:03:03<8:57:45, 26.34it/s]global step 150000, trans_decision ep_re 104.08361546021357

{"global_step": 150000, "eval_re": [21.627708333586323, 285.4844764376016, 
139.00183523515864, 119.77110997122067, 77.38661210305791, 222.01165629045593, 
120.76764461444742, 18.70786174345736, 21.611578063386013, 14.465671809763666], 
"eval_len": [24, 129, 90, 77, 62, 121, 82, 19, 25, 21]}

 16%|█▌        | 159998/1000000 [2:11:33<8:48:42, 26.48it/s]global step 160000, trans_decision ep_re 90.48671245852134

{"global_step": 160000, "eval_re": [116.27170217227432, 24.9550497537769, 
124.1290861296717, 93.16714191990734, 110.75591157297458, 21.025543532123393, 
208.74097270237655, 171.4108217117131, 19.318447167625383, 15.092447922770079], 
"eval_len": [93, 27, 82, 64, 89, 27, 117, 118, 24, 17]}

 17%|█▋        | 169998/1000000 [2:20:03<8:42:54, 26.45it/s]global step 170000, trans_decision ep_re 111.54476535894825

{"global_step": 170000, "eval_re": [28.628069801941535, 26.877250438222433, 
106.78582800444386, 97.64473729965106, 29.58677978063276, 99.39846954511805, 
232.48650987990925, 26.577163687576377, 89.6848244217003, 377.778020730287], 
"eval_len": [27, 27, 71, 69, 29, 70, 126, 32, 68, 165]}

 18%|█▊        | 179998/1000000 [2:28:33<8:41:15, 26.22it/s]global step 180000, trans_decision ep_re 72.32400885793604

{"global_step": 180000, "eval_re": [193.52314195685048, 19.63326908152206, 
164.53919199523298, 29.29255935468921, 13.295342897362456, 19.767460249595754, 
96.19301556167053, 27.5720259753989, 140.05135719328604, 19.37272431375178], 
"eval_len": [116, 26, 87, 30, 20, 25, 61, 34, 88, 27]}

 19%|█▉        | 189998/1000000 [2:37:02<8:35:45, 26.17it/s]global step 190000, trans_decision ep_re 84.79072522701779

{"global_step": 190000, "eval_re": [99.10009280952067, 186.35374821867254, 
126.02872098656319, 14.282532852004689, 21.359979812656007, 79.45263812948843, 
13.316434513508739, 19.927663846468946, 118.7357022985202, 169.34973880277443], 
"eval_len": [67, 96, 89, 19, 26, 51, 19, 23, 73, 96]}

 20%|█▉        | 199999/1000000 [2:45:32<8:30:28, 26.12it/s]global step 200000, trans_decision ep_re 120.85664151139187

{"global_step": 200000, "eval_re": [159.14537577098298, 92.11321619276599, 
100.57109033293975, 18.072741296980602, 219.07419243561125, 14.620052945962899, 
156.87048594169227, 101.34762810975954, 229.06140058808057, 117.6902314991427], 
"eval_len": [90, 66, 67, 24, 133, 21, 94, 66, 127, 84]}

 21%|██        | 209998/1000000 [2:54:03<8:18:28, 26.41it/s]global step 210000, trans_decision ep_re 111.95223811565855

{"global_step": 210000, "eval_re": [126.90723333713336, 74.72417328722835, 
221.31236553408203, 17.90541678014185, 367.1358212277812, 131.57124875503317, 
124.93535917014349, 17.67337762652904, 17.92314479809685, 19.434240640416007], 
"eval_len": [82, 51, 115, 19, 170, 97, 78, 19, 19, 22]}

 22%|██▏       | 219998/1000000 [3:02:33<8:13:09, 26.36it/s]global step 220000, trans_decision ep_re 116.14667321659533

{"global_step": 220000, "eval_re": [144.7410677220966, 226.27150404205494, 
160.2740094733647, 27.88004272032679, 112.47775258096915, 27.832171774038656, 
17.764182405440685, 91.58399145694193, 259.38641920507325, 93.25559078564662], 
"eval_len": [96, 116, 90, 29, 72, 27, 29, 69, 127, 70]}

 23%|██▎       | 229999/1000000 [3:11:03<8:11:36, 26.10it/s]global step 230000, trans_decision ep_re 127.31165272682365

{"global_step": 230000, "eval_re": [136.8805585114737, 74.3754870582567, 
86.59975851662618, 192.98631510769553, 231.46768494076062, 168.7834186516123, 
20.577521758345135, 88.23658162403815, 82.62730169465851, 190.58189940476956], 
"eval_len": [97, 51, 66, 114, 119, 102, 23, 62, 59, 105]}

 24%|██▍       | 239999/1000000 [3:19:32<8:05:14, 26.10it/s]global step 240000, trans_decision ep_re 83.15949921296577

{"global_step": 240000, "eval_re": [24.935946517773644, 21.99317511444573, 
273.855101804306, 35.658442561790515, 25.158832599093667, 92.56079796403618, 
234.4065225854674, 22.840599308761796, 72.87454166620292, 27.31103200777973], 
"eval_len": [28, 29, 148, 34, 26, 56, 135, 28, 53, 28]}

 25%|██▍       | 249999/1000000 [3:28:02<7:57:44, 26.16it/s]global step 250000, trans_decision ep_re 88.33273584275182

{"global_step": 250000, "eval_re": [132.85092858904508, 169.47484182827185, 
24.150473622658883, 180.94047548045813, 77.65089181305501, 173.0638048918779, 
19.711706457485576, 72.96117473566284, 16.354505533610492, 16.168555475392356], 
"eval_len": [83, 115, 36, 117, 59, 100, 24, 52, 18, 21]}

 26%|██▌       | 259997/1000000 [3:36:31<7:52:30, 26.10it/s]global step 260000, trans_decision ep_re 109.92473724989426

{"global_step": 260000, "eval_re": [203.95973579886063, 157.983860431592, 
84.14320455763416, 156.03586605994406, 73.86373288546508, 13.61015569957328, 
19.77395300522015, 117.29628560975794, 147.464531025344, 125.11604742555132], 
"eval_len": [124, 98, 63, 103, 50, 19, 26, 80, 94, 85]}

 27%|██▋       | 269997/1000000 [3:45:02<7:48:50, 25.95it/s]global step 270000, trans_decision ep_re 112.51160398126777

{"global_step": 270000, "eval_re": [20.11417129947468, 108.35448701936615, 
23.655169003352498, 16.22660955397834, 143.5828440593616, 127.62851183176235, 
133.20727092003983, 374.1023892225806, 111.92082275895056, 66.32376414381079], 
"eval_len": [20, 85, 27, 23, 95, 107, 80, 189, 77, 47]}

 28%|██▊       | 279997/1000000 [3:53:32<7:40:21, 26.07it/s]global step 280000, trans_decision ep_re 171.99357280249995

{"global_step": 280000, "eval_re": [74.16602513389071, 184.46090920358958, 
442.56842354131464, 115.69558924282187, 170.1329757472937, 248.68806396618135, 
240.21127459708822, 86.94292246865349, 21.781771090515353, 135.2877730336504], 
"eval_len": [51, 97, 217, 87, 95, 125, 144, 64, 24, 73]}

 29%|██▉       | 289997/1000000 [4:02:02<7:33:13, 26.11it/s]global step 290000, trans_decision ep_re 111.0391680648573

{"global_step": 290000, "eval_re": [34.0258382753524, 20.07833684938036, 
189.54390755568383, 163.3832486685735, 17.721410503503954, 26.286580125821803, 
18.562650202284942, 225.2314582681873, 125.72065187016362, 289.83759832962136], 
"eval_len": [34, 26, 123, 94, 20, 26, 20, 145, 80, 143]}

 30%|██▉       | 299999/1000000 [4:10:33<7:30:12, 25.91it/s]global step 300000, trans_decision ep_re 98.73042845395204

{"global_step": 300000, "eval_re": [142.93289704076756, 138.38655188158833, 
146.05712179429534, 118.51997551246176, 104.27766173185456, 136.22004696136602, 
120.66574238800494, 24.043584939852657, 29.27055265755456, 26.93014963177467], 
"eval_len": [86, 80, 83, 78, 69, 89, 72, 27, 31, 26]}

 31%|███       | 309999/1000000 [4:19:02<7:22:52, 25.97it/s]global step 310000, trans_decision ep_re 113.54332172950126

{"global_step": 310000, "eval_re": [124.07613837246817, 285.18882973863816, 
14.407183210234148, 35.7320099338778, 34.090491414700324, 157.76341647612293, 
112.6901407013227, 95.80780952103629, 144.47399272047522, 131.2032052061369], 
"eval_len": [76, 153, 19, 34, 32, 96, 79, 66, 84, 92]}

 32%|███▏      | 319999/1000000 [4:27:32<7:15:39, 26.01it/s]global step 320000, trans_decision ep_re 181.6041905498974

{"global_step": 320000, "eval_re": [118.58329274894015, 177.63425554330973, 
317.88729555405484, 105.40779176096439, 166.83388204143168, 135.37222231674167, 
154.0989526858318, 218.88125487314895, 182.89642718637623, 238.44653078817436], 
"eval_len": [91, 115, 145, 89, 107, 118, 97, 119, 92, 126]}

 33%|███▎      | 329998/1000000 [4:36:03<7:02:40, 26.42it/s]global step 330000, trans_decision ep_re 153.3245433245949

{"global_step": 330000, "eval_re": [18.430061688467674, 211.43781073305072, 
152.48663206764533, 147.287770369967, 221.32734246729805, 23.583609270972, 
178.81369000899585, 186.79426911554958, 285.0008598658685, 108.08338765813443], 
"eval_len": [20, 110, 94, 82, 122, 24, 107, 106, 132, 82]}

 34%|███▍      | 339997/1000000 [4:44:34<7:04:36, 25.91it/s]global step 340000, trans_decision ep_re 134.29362025928882

{"global_step": 340000, "eval_re": [141.06396698776786, 12.965167076019064, 
16.917498797874444, 91.11492113384834, 133.98634746312376, 160.28299561266178, 
137.41344100220766, 196.10616533902214, 67.75307961955939, 385.33261956080366], 
"eval_len": [85, 18, 20, 75, 82, 94, 109, 113, 44, 172]}

 35%|███▍      | 349997/1000000 [4:53:20<6:56:49, 25.99it/s]global step 350000, trans_decision ep_re 114.73918574553215

{"global_step": 350000, "eval_re": [172.55100984485404, 26.275939638285696, 
135.5798324590702, 20.46087165766, 14.876556076740775, 124.54421216691128, 
15.277522106598441, 241.77564303900735, 158.01676830279672, 238.03350216339715],
"eval_len": [103, 26, 89, 30, 21, 81, 25, 118, 90, 132]}

 36%|███▌      | 359998/1000000 [5:01:35<6:44:16, 26.38it/s]global step 360000, trans_decision ep_re 62.79622846362814

{"global_step": 360000, "eval_re": [22.769155942973633, 26.052649262012025, 
21.877468159971524, 15.803097159093777, 116.69158495111698, 33.55036050705677, 
23.92848018103762, 122.92033679624734, 228.17247232397304, 16.196679352798693], 
"eval_len": [25, 32, 26, 19, 75, 34, 26, 73, 119, 22]}

 37%|███▋      | 369997/1000000 [5:10:04<6:45:04, 25.92it/s]global step 370000, trans_decision ep_re 78.33317813524405

{"global_step": 370000, "eval_re": [15.697670654818525, 92.84339963621431, 
83.5048966163495, 157.69329499972997, 25.57149671626041, 20.83232748673489, 
74.0875455957602, 124.27648589316097, 164.99223399244934, 23.83242976096243], 
"eval_len": [21, 65, 59, 92, 28, 27, 52, 81, 92, 28]}

 38%|███▊      | 379998/1000000 [5:18:33<6:35:27, 26.13it/s]global step 380000, trans_decision ep_re 79.61613750756673

{"global_step": 380000, "eval_re": [19.034596541938278, 26.89157159525355, 
175.9474260523044, 20.167411495598966, 18.220519081190382, 195.03862800560333, 
90.01803639633458, 72.00200306202841, 90.87927903795179, 87.96190380746361], 
"eval_len": [21, 25, 90, 24, 26, 108, 61, 48, 68, 65]}

 39%|███▉      | 389999/1000000 [5:27:03<6:31:32, 25.97it/s]global step 390000, trans_decision ep_re 84.92501227552495

{"global_step": 390000, "eval_re": [181.34191346664713, 26.573088410527667, 
72.29203535670766, 18.067119638849544, 150.76853962639757, 24.260393974431125, 
15.740329998400078, 177.96131559598822, 28.16483851007777, 154.0805481772227], 
"eval_len": [97, 35, 50, 22, 90, 33, 25, 107, 27, 81]}

 40%|███▉      | 399997/1000000 [5:35:32<6:25:07, 25.97it/s]global step 400000, trans_decision ep_re 83.81754283667135

{"global_step": 400000, "eval_re": [19.32333146812096, 12.216081819850936, 
234.46166529570772, 69.91730487045777, 21.283085218555826, 28.24150693813006, 
122.54835012580132, 226.65932026849183, 15.900252592278902, 87.62452976931819], 
"eval_len": [22, 15, 142, 45, 31, 29, 83, 114, 21, 63]}

 41%|████      | 409997/1000000 [5:44:02<6:18:08, 26.00it/s]global step 410000, trans_decision ep_re 89.2903996207443

{"global_step": 410000, "eval_re": [14.775385129110184, 160.09324268102063, 
31.267189645843573, 18.296306559796218, 159.7914467966367, 233.2134294909195, 
24.663678202064386, 17.344678909865742, 144.44963592380546, 89.00900286838065], 
"eval_len": [26, 103, 30, 20, 127, 143, 30, 20, 103, 65]}

 42%|████▏     | 419997/1000000 [5:52:32<6:09:32, 26.16it/s]global step 420000, trans_decision ep_re 111.03871747404742

{"global_step": 420000, "eval_re": [121.57842450907559, 295.0576654384246, 
16.83825116000259, 20.1165164242776, 144.40578220354448, 152.2220771076562, 
27.72528134890832, 125.14064208039437, 21.07824835457059, 186.2242861136198], 
"eval_len": [83, 139, 18, 22, 89, 82, 34, 87, 26, 113]}

 43%|████▎     | 429997/1000000 [6:01:02<6:05:44, 25.97it/s]global step 430000, trans_decision ep_re 77.40692868118525

{"global_step": 430000, "eval_re": [24.871620165387306, 88.97634314415922, 
20.6945198138903, 16.526302789210714, 25.237184475088306, 19.914613388819784, 
19.58696389291948, 165.8734261551957, 153.93033121527745, 238.4579817719043], 
"eval_len": [30, 67, 25, 25, 26, 26, 26, 97, 89, 129]}

 44%|████▍     | 439997/1000000 [6:09:32<6:02:00, 25.78it/s]global step 440000, trans_decision ep_re 78.9200649227599

{"global_step": 440000, "eval_re": [29.885686821935092, 25.859892272743934, 
288.57684931855755, 30.74088419082111, 244.98390223069543, 19.363426533960006, 
25.38357716992499, 29.109272618074463, 74.8458740925563, 20.45128397833019], 
"eval_len": [30, 27, 136, 43, 118, 32, 26, 32, 72, 22]}

 45%|████▍     | 449997/1000000 [6:18:01<5:55:41, 25.77it/s]global step 450000, trans_decision ep_re 84.57409927676821

{"global_step": 450000, "eval_re": [216.74898269711048, 17.222599757984597, 
22.171143260209693, 145.21274501550008, 111.82789978523773, 24.388731069804233, 
105.81982314095049, 19.58409647677064, 150.97801224567243, 31.786959318441724], 
"eval_len": [110, 24, 28, 89, 80, 25, 74, 25, 94, 33]}

 46%|████▌     | 459998/1000000 [6:26:31<5:40:17, 26.45it/s]global step 460000, trans_decision ep_re 103.75728599429624

{"global_step": 460000, "eval_re": [156.12722068285063, 27.380675852482167, 
100.26386407648407, 18.360171799798124, 134.32830101345476, 187.62476838129842, 
34.15761858101397, 21.245419349809577, 332.79499562254773, 25.289824583223062], 
"eval_len": [115, 30, 87, 25, 85, 109, 40, 28, 165, 26]}

 47%|████▋     | 469997/1000000 [6:35:02<5:39:41, 26.00it/s]global step 470000, trans_decision ep_re 97.18044482874964

{"global_step": 470000, "eval_re": [247.93120604993925, 19.274803232825274, 
202.58577369779994, 123.35515044450624, 25.075558201803936, 164.85302880616058, 
33.47468095382174, 17.394345070972854, 111.07946516534133, 26.780436664325237], 
"eval_len": [128, 20, 104, 101, 30, 107, 32, 21, 99, 31]}

 48%|████▊     | 479999/1000000 [6:43:33<5:33:03, 26.02it/s]global step 480000, trans_decision ep_re 154.40609641448754

{"global_step": 480000, "eval_re": [241.1994685104207, 227.0071635324812, 
121.02479151601057, 20.144385531894205, 143.46070106639948, 155.9474592389298, 
115.88255598407228, 82.01226542643504, 369.8744097608193, 67.50776357741266], 
"eval_len": [125, 133, 81, 22, 83, 103, 75, 60, 191, 46]}

 49%|████▉     | 489998/1000000 [6:52:04<5:21:36, 26.43it/s]global step 490000, trans_decision ep_re 124.48701695866605

{"global_step": 490000, "eval_re": [11.229896329778601, 157.27691478674066, 
37.27382275966918, 26.906536486862574, 142.78218134241052, 156.10458709704835, 
353.21915928400955, 129.9732496961084, 201.62167306136098, 28.482148742671782], 
"eval_len": [19, 93, 34, 28, 86, 88, 162, 82, 105, 31]}

 50%|████▉     | 499997/1000000 [7:00:34<5:18:10, 26.19it/s]global step 500000, trans_decision ep_re 61.30465097053756

{"global_step": 500000, "eval_re": [18.10503444833265, 14.306627758835905, 
35.211519597948545, 35.87319133998477, 20.894030259148757, 146.4770463094819, 
156.4731943966349, 84.43529454206885, 22.97009110177761, 78.30047995116168], 
"eval_len": [19, 20, 43, 34, 28, 85, 89, 72, 27, 72]}

 51%|█████     | 509998/1000000 [7:09:03<5:09:07, 26.42it/s]global step 510000, trans_decision ep_re 69.11591992097296

{"global_step": 510000, "eval_re": [23.238879061254845, 67.73068365243012, 
85.47137378113707, 110.78762980965814, 13.352064703663887, 85.07270484100277, 
28.888792809738145, 231.2011611027308, 22.678149269642326, 22.7377601784714], 
"eval_len": [28, 47, 61, 82, 16, 61, 26, 132, 29, 31]}

 52%|█████▏    | 519999/1000000 [7:17:33<5:07:24, 26.02it/s]global step 520000, trans_decision ep_re 79.97306066906415

{"global_step": 520000, "eval_re": [17.30129130699911, 217.04263943054679, 
116.2619151050574, 22.684082070764923, 170.84461255667338, 32.46465534454594, 
143.36942842198917, 27.129867474829183, 20.370006429454204, 32.26210854978153], 
"eval_len": [23, 113, 88, 22, 100, 42, 84, 25, 23, 29]}

 53%|█████▎    | 529999/1000000 [7:26:04<5:01:34, 25.98it/s]global step 530000, trans_decision ep_re 78.18121196645707

{"global_step": 530000, "eval_re": [300.1934977776202, 179.56763091048413, 
30.069647075069238, 21.572414011654466, 36.6983310638565, 18.67715520788497, 
139.92880042747646, 20.078392089074278, 15.17792090468242, 19.84833019676797], 
"eval_len": [138, 103, 33, 21, 34, 21, 88, 25, 19, 29]}

 54%|█████▍    | 539997/1000000 [7:34:32<4:54:21, 26.05it/s]global step 540000, trans_decision ep_re 129.55736173999927

{"global_step": 540000, "eval_re": [86.69916421370954, 113.57802073558696, 
270.91457643381085, 301.59903848805436, 15.586119887476652, 23.70724918184801, 
199.53355256455734, 227.51084960819279, 27.60316519779211, 28.84188108896415], 
"eval_len": [63, 80, 125, 141, 19, 30, 114, 122, 31, 28]}

 55%|█████▍    | 549999/1000000 [7:43:10<4:47:05, 26.12it/s]global step 550000, trans_decision ep_re 95.53698654646624

{"global_step": 550000, "eval_re": [249.44344448608123, 172.933780001161, 
22.806192473315274, 24.73336797988136, 121.45701130612498, 109.40252865520738, 
75.26023361047488, 12.646116308616264, 148.73673545331113, 17.950455190488977], 
"eval_len": [152, 95, 24, 29, 78, 73, 54, 18, 97, 20]}

 56%|█████▌    | 559999/1000000 [7:51:40<4:40:33, 26.14it/s]global step 560000, trans_decision ep_re 77.6578392953639

{"global_step": 560000, "eval_re": [175.33479465454997, 123.84540072841082, 
21.86013959414214, 22.900357109439195, 99.17829723545317, 117.84917700388122, 
79.07703716833105, 101.37789433404869, 12.832015642505633, 22.32327948287703], 
"eval_len": [103, 93, 29, 25, 78, 83, 52, 73, 18, 25]}

 57%|█████▋    | 569997/1000000 [7:59:55<4:32:58, 26.25it/s]global step 570000, trans_decision ep_re 114.51607076635192

{"global_step": 570000, "eval_re": [25.678559352621132, 296.58268924950943, 
33.28385366476226, 156.5766336155284, 27.766264130419714, 160.02117553887277, 
95.12210079494302, 17.30588473384126, 31.79504573339417, 301.028500849627], 
"eval_len": [27, 144, 33, 98, 31, 86, 69, 23, 29, 142]}

 58%|█████▊    | 579998/1000000 [8:08:22<4:24:40, 26.45it/s]global step 580000, trans_decision ep_re 97.04734281290658

{"global_step": 580000, "eval_re": [29.314081506032675, 17.016268073905895, 
263.9677929946268, 187.54077110227658, 77.57295169268124, 23.633199050217556, 
153.55160829657686, 23.675856639474837, 173.22260510741697, 20.978293665856498],
"eval_len": [31, 21, 121, 123, 54, 25, 96, 28, 102, 23]}

 59%|█████▉    | 589999/1000000 [8:17:00<4:20:41, 26.21it/s]global step 590000, trans_decision ep_re 85.22817797070367

{"global_step": 590000, "eval_re": [149.21610200425545, 226.3974020208439, 
22.1569267855544, 212.54004068149513, 32.860890325353296, 24.284619302259365, 
12.454672021739423, 17.393032175667475, 20.766205586850354, 134.21188880301796],
"eval_len": [89, 121, 31, 118, 32, 27, 17, 23, 25, 83]}

 60%|█████▉    | 599999/1000000 [8:25:30<4:14:01, 26.24it/s]global step 600000, trans_decision ep_re 77.25750259447629

{"global_step": 600000, "eval_re": [25.990183022504116, 20.41697387328361, 
61.83218824002671, 257.52119864953283, 106.9362105899843, 16.66509715854325, 
29.100944768251264, 27.540052753876672, 209.16987111797016, 17.40230577079003], 
"eval_len": [29, 27, 43, 139, 78, 21, 32, 29, 136, 20]}

 61%|██████    | 609997/1000000 [8:33:45<4:07:54, 26.22it/s]global step 610000, trans_decision ep_re 70.23736713044138

{"global_step": 610000, "eval_re": [181.44384198902992, 25.5405706483522, 
107.59850658479941, 27.281471761809623, 23.160689098046163, 18.638119213947668, 
158.02498760398888, 26.097147856386968, 106.53910495746628, 28.049231590586725],
"eval_len": [93, 29, 75, 30, 22, 23, 94, 30, 75, 30]}

 62%|██████▏   | 619998/1000000 [8:42:12<4:00:17, 26.36it/s]global step 620000, trans_decision ep_re 63.6560512587978

{"global_step": 620000, "eval_re": [176.34093542912532, 24.76705971121942, 
27.445438619308774, 249.24516500386991, 21.96870000683973, 22.26454246804762, 
25.88921085571456, 26.309152718728846, 38.37488997213369, 23.95541780299012], 
"eval_len": [99, 32, 29, 119, 26, 24, 27, 30, 35, 23]}

 63%|██████▎   | 629999/1000000 [8:50:50<3:54:22, 26.31it/s]global step 630000, trans_decision ep_re 53.035383127438635

{"global_step": 630000, "eval_re": [111.76869780079612, 19.3613262129944, 
31.498033226471115, 19.910328354583992, 35.76974457750812, 167.6979360091134, 
21.08827703858787, 84.56507182190926, 23.155345511498318, 15.539070720923723], 
"eval_len": [70, 26, 33, 25, 33, 116, 24, 63, 27, 17]}

 64%|██████▍   | 639999/1000000 [8:59:20<3:49:22, 26.16it/s]global step 640000, trans_decision ep_re 79.65154879413726

{"global_step": 640000, "eval_re": [34.47696494082511, 166.67791175421098, 
155.38540627007868, 14.606785883943509, 24.6763796641466, 141.0334975057003, 
175.42296264933327, 29.271258600861774, 25.12187389137681, 29.842446780895393], 
"eval_len": [33, 122, 126, 19, 31, 97, 125, 30, 30, 28]}

 65%|██████▍   | 649999/1000000 [9:07:33<3:41:56, 26.28it/s]global step 650000, trans_decision ep_re 134.26385386941735

{"global_step": 650000, "eval_re": [259.3864650656696, 22.71179315300009, 
115.34353188891734, 157.0890193000649, 91.62659629611943, 152.1576284519724, 
89.83327504346725, 137.26080986863775, 174.54452714178453, 142.68489248454003], 
"eval_len": [121, 23, 67, 90, 58, 101, 66, 93, 107, 84]}

 66%|██████▌   | 659997/1000000 [9:16:01<3:36:26, 26.18it/s]global step 660000, trans_decision ep_re 47.565711189107816

{"global_step": 660000, "eval_re": [44.74818260134066, 51.77415268171116, 
19.651940185244012, 117.32072489807281, 25.92678402816565, 16.06652576781129, 
17.99851865303199, 129.18575138798192, 27.934991485669894, 25.049540202048792], 
"eval_len": [45, 53, 25, 68, 30, 26, 23, 80, 28, 31]}

 67%|██████▋   | 669999/1000000 [9:24:40<3:30:45, 26.10it/s]global step 670000, trans_decision ep_re 86.92391359097793

{"global_step": 670000, "eval_re": [21.912187986908407, 26.18627389568465, 
159.401041924066, 109.67099532729307, 114.02131994342778, 24.5510453290189, 
180.13582458051692, 30.36724730878396, 184.94411805018413, 18.049081563895474], 
"eval_len": [26, 24, 90, 73, 73, 26, 111, 33, 105, 27]}

 68%|██████▊   | 679999/1000000 [9:32:55<3:24:30, 26.08it/s]global step 680000, trans_decision ep_re 37.83176853541945

{"global_step": 680000, "eval_re": [74.84205622018908, 72.59624078941546, 
18.01858095526957, 18.250359500187724, 20.033438041985736, 73.7803516137221, 
28.957326935592334, 24.109947227085424, 27.341881534730927, 20.387502536016154],
"eval_len": [58, 69, 19, 22, 28, 63, 36, 24, 32, 20]}

 69%|██████▉   | 689998/1000000 [9:41:22<3:14:36, 26.55it/s]global step 690000, trans_decision ep_re 79.87195408319182

{"global_step": 690000, "eval_re": [32.073597072976156, 92.08884015354366, 
121.81575390877292, 107.63518756740659, 17.548344950927273, 26.729361412378413, 
32.34916785387252, 174.9024928697373, 16.935885882488453, 176.64090915981507], 
"eval_len": [32, 61, 79, 69, 22, 27, 32, 113, 18, 105]}

 70%|██████▉   | 699999/1000000 [9:50:00<3:10:34, 26.24it/s]global step 700000, trans_decision ep_re 119.78126081913305

{"global_step": 700000, "eval_re": [126.64083806135544, 335.9417177869745, 
61.81121785630022, 111.06841379127356, 88.79678155757232, 17.09510684705165, 
22.783195993715847, 144.9311789326929, 121.48089556000542, 167.26326180438875], 
"eval_len": [88, 164, 43, 79, 67, 19, 27, 89, 81, 102]}

 71%|███████   | 709999/1000000 [9:58:30<3:03:35, 26.33it/s]global step 710000, trans_decision ep_re 121.2856921679008

{"global_step": 710000, "eval_re": [135.7935818561493, 222.98631842150877, 
46.15908895364052, 198.9875288237161, 73.36345572748442, 21.247352808864907, 
80.54720962582773, 131.6189832888546, 277.63091349077513, 24.522488682186886], 
"eval_len": [91, 124, 40, 106, 51, 23, 61, 94, 124, 29]}

 72%|███████▏  | 719999/1000000 [10:06:44<2:56:56, 26.37it/s]global step 720000, trans_decision ep_re 95.82121413737005

{"global_step": 720000, "eval_re": [86.57876875453442, 175.8526014980959, 
72.29254256136059, 136.81172260185522, 36.58350497103291, 178.9288500285846, 
152.78594822268352, 70.52362909868239, 32.630220813303055, 15.224352823567699], 
"eval_len": [69, 95, 53, 82, 35, 97, 95, 47, 30, 23]}

 73%|███████▎  | 729999/1000000 [10:15:20<2:51:41, 26.21it/s]global step 730000, trans_decision ep_re 86.41459431689006

{"global_step": 730000, "eval_re": [23.821995323324803, 115.05449378207658, 
205.73129061749012, 24.674780437739535, 92.37381248182325, 43.419203050427804, 
130.29599001823314, 38.670566074363244, 171.227512770909, 18.87629861251301], 
"eval_len": [25, 84, 154, 25, 65, 49, 95, 45, 142, 30]}

 74%|███████▍  | 739999/1000000 [10:23:50<2:46:17, 26.06it/s]global step 740000, trans_decision ep_re 98.77226595285208

{"global_step": 740000, "eval_re": [101.08743845423011, 14.907100501363356, 
125.69753954010356, 96.11206932715075, 14.16404138476371, 206.54975439030795, 
163.67409928116197, 36.3057794583228, 157.8892737627832, 71.33556342833343], 
"eval_len": [62, 18, 78, 71, 18, 132, 109, 37, 97, 55]}

 75%|███████▍  | 749997/1000000 [10:32:04<2:38:43, 26.25it/s]global step 750000, trans_decision ep_re 73.01620617329996

{"global_step": 750000, "eval_re": [69.51358336249898, 200.20671488349657, 
18.680806840736054, 97.02217812216868, 111.26109352974392, 22.581882128789132, 
16.02769687141223, 154.77924224475663, 18.34650335208262, 21.74236039731491], 
"eval_len": [80, 115, 20, 69, 73, 28, 18, 105, 23, 24]}

 76%|███████▌  | 759999/1000000 [10:40:40<2:32:18, 26.26it/s]global step 760000, trans_decision ep_re 69.5315223311891

{"global_step": 760000, "eval_re": [140.9934077553883, 39.46050350999088, 
242.7332808898644, 23.588599183781092, 57.735295259509684, 33.7062795228023, 
71.13638923750064, 36.54284296392963, 21.66082139970745, 27.75780358941665], 
"eval_len": [93, 37, 136, 27, 47, 30, 50, 32, 34, 30]}

 77%|███████▋  | 769999/1000000 [10:48:55<2:25:48, 26.29it/s]global step 770000, trans_decision ep_re 97.95961904189252

{"global_step": 770000, "eval_re": [115.85476186834433, 45.25424869450299, 
15.040825149657469, 254.46088623210096, 20.26367770737888, 112.84035915776231, 
132.77858024007912, 113.43141000107408, 54.76958526936198, 114.90185609866306], 
"eval_len": [89, 48, 21, 133, 24, 94, 81, 93, 47, 80]}

 78%|███████▊  | 779999/1000000 [10:57:20<2:18:31, 26.47it/s]global step 780000, trans_decision ep_re 58.02850996161776

{"global_step": 780000, "eval_re": [21.280706926781775, 70.86907808498498, 
16.740107258855307, 83.41597745224516, 22.438747513541585, 145.12269555165608, 
24.976244102164713, 69.37104995208232, 103.99713798641251, 22.07335478745308], 
"eval_len": [21, 51, 19, 73, 28, 95, 28, 46, 85, 23]}

 79%|███████▉  | 789998/1000000 [11:05:46<2:12:17, 26.46it/s]global step 790000, trans_decision ep_re 45.78245830511905

{"global_step": 790000, "eval_re": [25.317483218092114, 21.415626091267857, 
147.8124055710485, 23.06031416339827, 20.400982949586897, 28.566394080820515, 
17.41764202592655, 113.7443206711589, 33.505886911766964, 26.583527368123953], 
"eval_len": [27, 24, 82, 32, 31, 34, 19, 90, 46, 41]}

 80%|███████▉  | 799998/1000000 [11:14:12<2:05:05, 26.65it/s]global step 800000, trans_decision ep_re 107.31079035189657

{"global_step": 800000, "eval_re": [124.63155033247368, 15.743630281791448, 
446.64778022838965, 81.16076449139723, 24.530356777009633, 21.000360775956405, 
90.69226342997847, 21.801864401659977, 23.50969586057727, 223.3896369397321], 
"eval_len": [84, 25, 190, 52, 28, 27, 62, 25, 27, 123]}

 81%|████████  | 809999/1000000 [11:22:50<1:59:57, 26.40it/s]global step 810000, trans_decision ep_re 50.18102775669727

{"global_step": 810000, "eval_re": [105.05762232732047, 30.733593333969786, 
113.73773514411373, 22.682524735284886, 21.21580600134551, 24.473822524400827, 
22.31819637031209, 17.445944164902315, 15.446573086682301, 128.69845987864076], 
"eval_len": [62, 31, 73, 28, 26, 28, 25, 24, 18, 101]}

 82%|████████▏ | 819997/1000000 [11:31:04<1:54:16, 26.25it/s]global step 820000, trans_decision ep_re 105.03381756756573

{"global_step": 820000, "eval_re": [76.59748068435523, 281.82451760102026, 
159.75736565241738, 19.473611784902122, 24.846684693886754, 274.1292421679161, 
18.788670434260336, 15.84689690351322, 17.24080550721908, 161.83290024616682], 
"eval_len": [53, 147, 111, 26, 26, 131, 22, 22, 24, 92]}

 83%|████████▎ | 829999/1000000 [11:39:40<1:48:15, 26.17it/s]global step 830000, trans_decision ep_re 94.48880805065048

{"global_step": 830000, "eval_re": [128.06542948037347, 148.76169556236923, 
139.9146060588443, 25.217022853564885, 98.74046759652006, 84.60531685168415, 
102.31048493116758, 34.37043423352949, 16.461668546062658, 166.44095439238893], 
"eval_len": [90, 87, 86, 26, 70, 75, 75, 37, 28, 110]}

 84%|████████▍ | 839999/1000000 [11:47:56<1:41:27, 26.28it/s]global step 840000, trans_decision ep_re 67.46886853465587

{"global_step": 840000, "eval_re": [26.662716285731797, 18.76156924288864, 
16.294299518330075, 22.538564909255836, 109.60632570412638, 106.40275965238592, 
300.376736316135, 29.718033974934517, 26.348978586213235, 17.978701156557182], 
"eval_len": [30, 28, 22, 30, 86, 77, 131, 41, 32, 27]}

 85%|████████▍ | 849997/1000000 [11:56:40<1:37:05, 25.75it/s]global step 850000, trans_decision ep_re 75.38857316794838

{"global_step": 850000, "eval_re": [40.75581648079536, 439.7250200382531, 
43.350442158604494, 19.807667672665684, 41.756872217309, 25.118623781187015, 
34.81328763627929, 42.44804241210286, 24.597811029150538, 41.51214825313646], 
"eval_len": [41, 186, 47, 30, 41, 25, 34, 42, 24, 39]}

 86%|████████▌ | 859999/1000000 [12:05:00<1:29:48, 25.98it/s]global step 860000, trans_decision ep_re 98.19022770667466

{"global_step": 860000, "eval_re": [82.8807410625696, 18.42608543680816, 
169.80749178711451, 27.71857815785508, 98.1973160007942, 142.19508746511983, 
115.38079321609749, 146.87071443494335, 31.07727718978357, 149.3481923156609], 
"eval_len": [61, 25, 89, 29, 69, 78, 77, 95, 30, 91]}

 87%|████████▋ | 869999/1000000 [12:13:35<1:24:29, 25.64it/s]global step 870000, trans_decision ep_re 123.4079474325733

{"global_step": 870000, "eval_re": [22.80370026539424, 21.752873732924115, 
175.4468913042215, 29.198991844975115, 100.12837480432823, 212.21867541896003, 
387.4814146134427, 113.49591361239045, 16.75175787162092, 154.8008808574757], 
"eval_len": [28, 24, 100, 27, 72, 106, 154, 84, 20, 95]}

 88%|████████▊ | 879998/1000000 [12:22:20<1:15:55, 26.34it/s]global step 880000, trans_decision ep_re 80.91643434210815

{"global_step": 880000, "eval_re": [23.31967222667887, 35.661245833453265, 
23.89076636552744, 271.5213730828298, 28.39828359247501, 216.10708990465795, 
40.641671452545594, 16.149381632768993, 138.01542315755506, 15.459436172589566],
"eval_len": [27, 41, 29, 132, 34, 117, 36, 19, 93, 18]}

 89%|████████▉ | 889998/1000000 [12:30:42<1:10:59, 25.83it/s]global step 890000, trans_decision ep_re 72.72951731700712

{"global_step": 890000, "eval_re": [16.571379528973818, 15.757537877117086, 
179.88403605046534, 85.76452526687578, 142.16784351046698, 14.618764744653493, 
87.84059771532765, 27.31320189682115, 135.97676099919295, 21.400525580176975], 
"eval_len": [19, 22, 103, 55, 79, 22, 61, 27, 96, 26]}

 90%|████████▉ | 899998/1000000 [12:39:30<1:03:40, 26.17it/s]global step 900000, trans_decision ep_re 89.80760488086501

{"global_step": 900000, "eval_re": [205.95105682825576, 41.27603178450489, 
14.996306530392147, 23.109101717517028, 56.27020148973234, 143.12779938914, 
100.26405890712752, 81.64830077706374, 96.22358692478319, 135.20960446013365], 
"eval_len": [121, 42, 17, 24, 58, 102, 71, 62, 65, 75]}

 91%|█████████ | 909997/1000000 [12:48:10<58:38, 25.58it/s]global step 910000, trans_decision ep_re 169.06629703559594

{"global_step": 910000, "eval_re": [311.78287132057693, 190.31228091900604, 
121.530102685081, 160.48354407410753, 19.87380293634086, 128.49304465740377, 
249.02572657709723, 221.9532051231394, 150.54376486717425, 136.66462719603243], 
"eval_len": [152, 109, 85, 101, 20, 92, 132, 135, 87, 83]}

 92%|█████████▏| 919998/1000000 [12:56:50<51:13, 26.03it/s]global step 920000, trans_decision ep_re 138.12807602068293

{"global_step": 920000, "eval_re": [255.42743896525155, 291.9030125549933, 
149.1284366156472, 125.22562977531948, 21.392654005973412, 21.963307550926434, 
16.1260137717618, 344.016247854343, 16.954672586312668, 139.14334652630038], 
"eval_len": [115, 129, 88, 78, 28, 22, 20, 151, 26, 90]}

 93%|█████████▎| 929999/1000000 [13:05:30<45:48, 25.47it/s]global step 930000, trans_decision ep_re 91.71719950326874

{"global_step": 930000, "eval_re": [12.701152386023097, 100.08433985046888, 
24.218134066998342, 16.757602638386484, 18.935787933369955, 253.8786887147334, 
187.75595137723704, 158.70894440947478, 118.7755044314374, 25.35588922455807], 
"eval_len": [17, 70, 26, 23, 21, 136, 113, 111, 80, 27]}

 94%|█████████▍| 939998/1000000 [13:14:10<38:35, 25.91it/s]global step 940000, trans_decision ep_re 39.05143872081354

{"global_step": 940000, "eval_re": [37.296110004958905, 17.5333763826978, 
19.94000521195457, 39.719663023151824, 23.078996039829374, 42.15297432472928, 
20.104282323958785, 132.0949180882346, 31.03232459970575, 27.561737208914515], 
"eval_len": [44, 23, 30, 43, 29, 44, 21, 89, 29, 28]}

 95%|█████████▍| 949998/1000000 [13:22:50<31:45, 26.24it/s]global step 950000, trans_decision ep_re 95.8616023262326

{"global_step": 950000, "eval_re": [18.459927468923745, 41.27553149628589, 
63.397448240077125, 278.81726233833496, 207.67267044017987, 43.41850872304898, 
24.699297406746233, 18.98013436897764, 207.92815849575834, 53.96708428399309], 
"eval_len": [24, 46, 59, 134, 113, 37, 29, 20, 104, 52]}

 96%|█████████▌| 959999/1000000 [13:31:30<25:58, 25.66it/s]global step 960000, trans_decision ep_re 57.48963179098995

{"global_step": 960000, "eval_re": [200.83953830717877, 18.71862684461848, 
72.07449782119673, 16.287858643100165, 39.88985467530203, 23.90779015430038, 
28.59263503255294, 26.447352697880632, 36.8328483105462, 111.30531542322318], 
"eval_len": [115, 23, 79, 22, 40, 27, 40, 31, 39, 75]}

 97%|█████████▋| 969999/1000000 [13:40:10<19:26, 25.71it/s]global step 970000, trans_decision ep_re 100.66284834220043

{"global_step": 970000, "eval_re": [43.333958530577775, 131.65038994112132, 
119.50521080702163, 110.60241164117703, 81.11800969310966, 143.2877634606329, 
61.55714615724059, 281.537611386456, 10.899095752267241, 23.136886052400207], 
"eval_len": [53, 81, 78, 77, 55, 104, 56, 130, 14, 24]}

 98%|█████████▊| 979998/1000000 [13:48:44<12:45, 26.12it/s]global step 980000, trans_decision ep_re 89.20125943012357

{"global_step": 980000, "eval_re": [92.1902256560455, 51.8950111971104, 
98.92956944381754, 14.07638293756153, 109.96575760413705, 51.77177327137852, 
61.93865508756632, 180.31328095555452, 193.27338309482795, 37.65855505323634], 
"eval_len": [72, 40, 68, 20, 76, 52, 59, 96, 100, 44]}

 99%|█████████▉| 989999/1000000 [13:57:40<06:37, 25.19it/s]global step 990000, trans_decision ep_re 74.56172811886529

{"global_step": 990000, "eval_re": [20.935125934479217, 45.88356974749852, 
17.891472020113312, 373.5592014987166, 11.924710578041678, 32.02013836981919, 
41.96126808877177, 44.88023508230252, 122.48943625752986, 34.07212361138021], 
"eval_len": [28, 47, 20, 164, 14, 37, 49, 42, 85, 38]}

100%|█████████▉| 999999/1000000 [14:06:10<00:00, 25.42it/s]global step 1000000, trans_decision ep_re 113.9190690897523

{"global_step": 1000000, "eval_re": [148.05150208833967, 161.95560561334472, 
108.44072186640774, 225.61007127490205, 26.934301081559635, 21.147461622065123, 
91.20019836630543, 215.0296529858002, 103.41233042128324, 37.40884557751512], 
"eval_len": [93, 94, 80, 108, 27, 23, 68, 111, 71, 38]}

100%|██████████| 1000000/1000000 [14:06:26<00:00, 19.69it/s]
