
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9998/1000000 [05:02<12:17:52, 22.36it/s]global step 10000, trans_decision ep_re 167.06001926827602

{"global_step": 10000, "eval_re": [101.07933543327546, 180.09330870270318, 
95.26514068137784, 155.20453907069717, 371.87328841506803, 128.69409669547557, 
90.2757094981181, 96.07339170669117, 261.9668657065233, 190.07451677283044], 
"eval_len": [20, 34, 19, 29, 71, 25, 18, 19, 50, 36]}

  2%|▏         | 19999/1000000 [15:30<12:10:12, 22.37it/s]global step 20000, trans_decision ep_re 191.72470862863275

{"global_step": 20000, "eval_re": [464.77807985472737, 96.37585895207172, 
122.87011912404665, 96.1295067446029, 328.91065252566676, 89.06130885314793, 
355.39151191765063, 119.22733013010941, 144.17144830598875, 100.33126987831558],
"eval_len": [85, 19, 24, 19, 61, 18, 74, 23, 28, 20]}

  3%|▎         | 29999/1000000 [25:20<12:06:12, 22.26it/s]global step 30000, trans_decision ep_re 150.4938554304593

{"global_step": 30000, "eval_re": [84.03388439121113, 203.5508853658059, 
89.21999736120289, 95.71127358279834, 84.0290942344149, 102.57691310545769, 
144.91252970476236, 163.64421807026784, 117.07966220494184, 420.1800962837299], 
"eval_len": [17, 38, 18, 19, 17, 20, 28, 32, 23, 76]}

  4%|▍         | 39999/1000000 [35:40<11:58:43, 22.26it/s]global step 40000, trans_decision ep_re 155.67292214632607

{"global_step": 40000, "eval_re": [136.7381433404405, 405.26549563623325, 
195.75040734481277, 151.50892946246333, 106.55114102511232, 90.04969734350675, 
140.94540939897104, 83.89070541109501, 89.32368378094445, 156.70560871968104], 
"eval_len": [26, 75, 38, 29, 21, 18, 27, 17, 18, 30]}

  5%|▍         | 49999/1000000 [45:50<11:56:37, 22.09it/s]global step 50000, trans_decision ep_re 154.32311831355057

{"global_step": 50000, "eval_re": [145.69857105882846, 100.54372926159662, 
84.06024844912027, 530.8314396562325, 129.58912874965154, 157.12952667470663, 
96.33437400587131, 95.18408923083648, 90.52017284461293, 113.33990320404888], 
"eval_len": [28, 20, 17, 103, 27, 30, 19, 19, 18, 22]}

  6%|▌         | 59999/1000000 [56:00<11:53:48, 21.95it/s]global step 60000, trans_decision ep_re 194.33435410916633

{"global_step": 60000, "eval_re": [401.18883152135476, 274.00041150304105, 
432.8096219429415, 90.34178355903602, 107.05304174318097, 117.99918631181366, 
132.88703227645672, 125.96199239155087, 105.72437917547315, 155.37726066681464],
"eval_len": [77, 51, 96, 18, 21, 23, 26, 25, 21, 30]}

  7%|▋         | 69997/1000000 [1:06:10<11:43:22, 22.04it/s]global step 70000, trans_decision ep_re 150.73044130390525

{"global_step": 70000, "eval_re": [100.8479463220787, 102.15171283079047, 
118.89993777487764, 113.10541426769782, 102.69670447828294, 152.30425965678168, 
90.17864753649582, 157.63769188795703, 84.02797048844789, 485.45412779564265], 
"eval_len": [20, 20, 23, 22, 20, 29, 18, 33, 17, 91]}

  8%|▊         | 79998/1000000 [1:16:01<11:16:35, 22.66it/s]global step 80000, trans_decision ep_re 166.6244418081714

{"global_step": 80000, "eval_re": [132.9656399496144, 96.20533607607649, 
111.27529197069606, 111.62733540356365, 226.68742241595055, 391.21564873993464, 
148.97651444591915, 190.72590035701987, 95.71518997879096, 160.85013874414855], 
"eval_len": [26, 19, 22, 22, 44, 77, 29, 36, 19, 31]}

  9%|▉         | 89999/1000000 [1:26:20<11:19:12, 22.33it/s]global step 90000, trans_decision ep_re 128.02783215834265

{"global_step": 90000, "eval_re": [135.74204601583483, 102.19593065284813, 
219.01677015255578, 101.97098522049016, 101.0519920255569, 125.45988198474964, 
102.82310227819053, 129.19866822788524, 112.33044113355439, 150.48850389176098],
"eval_len": [26, 20, 43, 20, 20, 24, 20, 26, 22, 29]}

 10%|▉         | 99999/1000000 [1:36:30<11:08:03, 22.45it/s]global step 100000, trans_decision ep_re 142.61334734769878

{"global_step": 100000, "eval_re": [143.38839095039546, 117.10227204523852, 
108.13113044848993, 107.81740349620077, 241.20791541787332, 101.87913023960195, 
129.8719922088219, 102.77292771565588, 249.43380780987482, 124.52850314483499], 
"eval_len": [29, 23, 22, 21, 48, 20, 25, 20, 50, 24]}

 11%|█         | 109998/1000000 [1:46:21<10:59:52, 22.48it/s]global step 110000, trans_decision ep_re 162.12933132971477

{"global_step": 110000, "eval_re": [107.5930089773715, 113.1707116876169, 
400.01417920525114, 143.7419830999805, 100.78630348010715, 95.03956111610168, 
154.37406470170274, 255.50467245554805, 160.62958131539966, 90.43924725806825], 
"eval_len": [21, 22, 82, 28, 20, 19, 31, 48, 31, 18]}

 12%|█▏        | 119999/1000000 [1:56:30<10:56:06, 22.35it/s]global step 120000, trans_decision ep_re 202.3829010666302

{"global_step": 120000, "eval_re": [129.10119160958433, 89.29792654513398, 
364.2696176539327, 190.10847508396472, 122.61514715699407, 193.8218659274475, 
163.3241554528237, 149.3375157868794, 151.9240850866284, 470.0290303629133], 
"eval_len": [25, 18, 68, 36, 24, 38, 33, 29, 29, 101]}

 13%|█▎        | 129999/1000000 [2:06:50<10:49:37, 22.32it/s]global step 130000, trans_decision ep_re 196.3080929248731

{"global_step": 130000, "eval_re": [210.17867577716322, 83.96662860502649, 
509.3371200300921, 134.77869979862442, 329.91444683343394, 95.86092217640218, 
108.42005589012281, 96.50263048177746, 298.1269945271723, 95.99475512891604], 
"eval_len": [43, 17, 114, 26, 64, 19, 21, 19, 57, 19]}

 14%|█▍        | 139999/1000000 [2:17:00<10:45:21, 22.21it/s]global step 140000, trans_decision ep_re 168.2176701681032

{"global_step": 140000, "eval_re": [89.67589167750353, 341.22650091379006, 
119.70138346951646, 201.1070436860669, 122.66188066068564, 154.21461409319554, 
150.4554442422635, 160.07534265002812, 196.53441567580188, 146.52418461218028], 
"eval_len": [18, 65, 23, 38, 24, 30, 29, 35, 38, 28]}

 15%|█▍        | 149999/1000000 [2:27:10<10:36:40, 22.25it/s]global step 150000, trans_decision ep_re 239.73731242083278

{"global_step": 150000, "eval_re": [101.94293635424869, 307.1666222367615, 
83.96311187097625, 102.12558015254467, 164.34576735884696, 589.5132294777253, 
113.48818444834333, 502.80950432768714, 129.65198539342012, 302.36620258777356],
"eval_len": [20, 61, 17, 20, 33, 118, 22, 92, 25, 61]}

 16%|█▌        | 159999/1000000 [2:37:10<10:24:35, 22.41it/s]global step 160000, trans_decision ep_re 153.43804709857332

{"global_step": 160000, "eval_re": [112.25397036851986, 119.2468842461061, 
142.69216705376044, 101.81951492301847, 107.50466438158809, 179.14793251878766, 
110.83996630362594, 96.07645138726292, 412.79073948187204, 152.00818032119145], 
"eval_len": [22, 23, 27, 20, 21, 36, 22, 19, 74, 29]}

 17%|█▋        | 169999/1000000 [2:47:20<10:16:43, 22.43it/s]global step 170000, trans_decision ep_re 162.74921231718173

{"global_step": 170000, "eval_re": [95.69218307248295, 443.12432456402, 
183.2723754588002, 157.4137256651599, 125.49385546466675, 151.96910078669222, 
169.61609276925734, 107.83752286425542, 108.39049423451571, 84.682448291967], 
"eval_len": [19, 84, 35, 33, 25, 30, 33, 21, 21, 17]}

 18%|█▊        | 179999/1000000 [2:57:10<10:10:26, 22.39it/s]global step 180000, trans_decision ep_re 137.0604740682855

{"global_step": 180000, "eval_re": [153.45617808769296, 89.74105049716036, 
109.41944338354497, 89.55176696502946, 135.80368174488007, 90.19689334454054, 
95.37261051689853, 125.84600638512823, 179.41359695243997, 301.8035128055399], 
"eval_len": [30, 18, 21, 18, 26, 18, 19, 24, 36, 57]}

 19%|█▉        | 189999/1000000 [3:07:30<10:03:36, 22.37it/s]global step 190000, trans_decision ep_re 185.92499927713695

{"global_step": 190000, "eval_re": [121.62623563815886, 362.43584308708614, 
107.18073367139232, 154.66134057384664, 106.43407048017868, 231.68997873367493, 
314.24565512535673, 118.02484972925627, 162.21059357662753, 180.74069215579152],
"eval_len": [24, 65, 21, 30, 21, 44, 58, 23, 31, 36]}

 20%|█▉        | 199998/1000000 [3:17:22<9:48:04, 22.67it/s]global step 200000, trans_decision ep_re 147.60772300912217

{"global_step": 200000, "eval_re": [95.70710482134999, 145.5222617893695, 
120.85832678732339, 118.00411053586978, 433.34510122296666, 83.86504478713489, 
112.75310125762252, 95.58782803331202, 96.0886098379057, 174.34574101836722], 
"eval_len": [19, 30, 23, 23, 80, 17, 22, 19, 19, 34]}

 21%|██        | 209999/1000000 [3:27:40<9:49:11, 22.35it/s]global step 210000, trans_decision ep_re 148.52571919905523

{"global_step": 210000, "eval_re": [163.07429743221928, 95.82046486110404, 
141.31477966521535, 96.62739874075149, 443.9024581700489, 96.97494343542355, 
121.22538026651222, 89.39867040681551, 125.60077080084757, 111.31802821161432], 
"eval_len": [31, 19, 28, 19, 88, 19, 24, 18, 24, 22]}

 22%|██▏       | 219998/1000000 [3:37:50<9:31:02, 22.77it/s]global step 220000, trans_decision ep_re 158.85871871074602

{"global_step": 220000, "eval_re": [112.95219841741084, 123.04328191819538, 
89.83152688993526, 131.2667348808233, 394.9004901615796, 125.17428432713729, 
89.18629858496872, 83.91424056142458, 290.04157839786996, 148.27655296811542], 
"eval_len": [22, 24, 18, 26, 80, 24, 18, 17, 57, 28]}

 23%|██▎       | 229999/1000000 [3:47:50<9:33:14, 22.39it/s]global step 230000, trans_decision ep_re 158.37552253308655

{"global_step": 230000, "eval_re": [84.02877170747074, 90.67479479755963, 
108.35231283618552, 101.09664942083735, 117.50661239676171, 136.6736714090457, 
95.75605077030113, 125.84877170334084, 289.2004982406266, 434.617092048736], 
"eval_len": [17, 18, 22, 20, 23, 26, 19, 24, 57, 80]}

 24%|██▍       | 239999/1000000 [3:58:00<9:26:08, 22.37it/s]global step 240000, trans_decision ep_re 254.80461357697504

{"global_step": 240000, "eval_re": [98.24867200442678, 164.94056885555273, 
247.53328473732617, 172.1540540305139, 133.86168795014265, 389.64542443526534, 
342.7615815221804, 137.83394492338473, 468.7753225139077, 392.29159479704987], 
"eval_len": [20, 32, 49, 34, 26, 75, 69, 27, 90, 73]}

 25%|██▍       | 249999/1000000 [4:08:00<9:20:08, 22.32it/s]global step 250000, trans_decision ep_re 164.52991747220298

{"global_step": 250000, "eval_re": [152.9864878274147, 103.0304936413476, 
113.37951308675457, 83.98014396308182, 125.5154189616644, 307.57563562823805, 
125.21701387707162, 102.01727906118468, 435.9075845396749, 95.68960413559738], 
"eval_len": [30, 20, 23, 17, 24, 57, 26, 20, 95, 19]}

 26%|██▌       | 259998/1000000 [4:17:50<9:06:00, 22.59it/s]global step 260000, trans_decision ep_re 164.48238302384584

{"global_step": 260000, "eval_re": [399.20092008008163, 167.10386831145433, 
117.94195435847853, 112.03286482156736, 122.41683294675133, 149.19257063523284, 
265.9516908820922, 106.52171413445431, 96.46314428194968, 107.99826978639616], 
"eval_len": [76, 33, 23, 22, 24, 30, 47, 21, 19, 21]}

 27%|██▋       | 269997/1000000 [4:27:53<9:02:19, 22.43it/s]global step 270000, trans_decision ep_re 149.16575540066447

{"global_step": 270000, "eval_re": [95.70445233513134, 160.33389594747192, 
140.24157270825268, 125.04586834419598, 89.27155159172158, 149.6849446869959, 
146.93347897940222, 319.6316843317397, 162.29306638602705, 102.51703869570639], 
"eval_len": [19, 31, 27, 25, 18, 29, 29, 57, 32, 20]}

 28%|██▊       | 279999/1000000 [4:38:10<8:50:31, 22.62it/s]global step 280000, trans_decision ep_re 158.659227712749

{"global_step": 280000, "eval_re": [100.36523713373789, 83.85642946707111, 
368.76080881743513, 326.90589041389615, 121.79031127224346, 118.36967928994926, 
96.13274916686274, 146.59500162997017, 104.9385792340658, 118.87759070225802], 
"eval_len": [20, 17, 70, 64, 24, 23, 19, 29, 21, 23]}

 29%|██▉       | 289999/1000000 [4:48:10<8:43:31, 22.60it/s]global step 290000, trans_decision ep_re 240.20280668856384

{"global_step": 290000, "eval_re": [300.24087496327854, 313.4571586031414, 
566.0431546554746, 89.0740147079215, 194.7352215311633, 97.1314892129579, 
497.51475728221834, 107.05713684314031, 119.61799853240186, 117.15626055394083],
"eval_len": [54, 59, 114, 18, 37, 19, 99, 21, 23, 23]}

 30%|██▉       | 299997/1000000 [4:58:01<8:40:03, 22.43it/s]global step 300000, trans_decision ep_re 166.2945050400414

{"global_step": 300000, "eval_re": [280.49657203721677, 101.16154225333523, 
89.87678418510932, 110.77533084738468, 251.84554624508223, 120.35182702996715, 
357.4906461626099, 101.52102788558523, 154.08407715312407, 95.34169660099934], 
"eval_len": [55, 20, 18, 22, 52, 24, 73, 20, 30, 19]}

 31%|███       | 309999/1000000 [5:08:20<8:26:52, 22.69it/s]global step 310000, trans_decision ep_re 124.50534071003241

{"global_step": 310000, "eval_re": [137.37974923846173, 106.26430525947737, 
96.74470053503792, 130.85284333101433, 89.66948105358995, 146.5873610457408, 
102.9361776419365, 196.4556361630212, 115.96723304801876, 122.19591978402563], 
"eval_len": [28, 21, 19, 25, 18, 29, 20, 37, 23, 24]}

 32%|███▏      | 319999/1000000 [5:18:20<8:23:27, 22.51it/s]global step 320000, trans_decision ep_re 195.14283764133364

{"global_step": 320000, "eval_re": [148.0549035412644, 146.10905684139226, 
238.31191083157063, 144.49602964167326, 169.29454673140913, 164.1459363235502, 
106.21104031272039, 319.9725580842419, 111.98246331607866, 402.8499307894359], 
"eval_len": [29, 28, 48, 28, 33, 34, 21, 62, 24, 77]}

 33%|███▎      | 329997/1000000 [5:28:10<8:21:14, 22.28it/s]global step 330000, trans_decision ep_re 151.4118239139929

{"global_step": 330000, "eval_re": [185.84090966734735, 106.71770500034911, 
122.95589144866165, 138.54206567414184, 90.11388264919317, 129.0265727518895, 
89.56292134779072, 178.2552540286296, 348.7328158246675, 124.37022074725844], 
"eval_len": [36, 21, 24, 27, 18, 25, 18, 34, 67, 24]}

 34%|███▍      | 339999/1000000 [5:38:12<8:12:30, 22.33it/s]global step 340000, trans_decision ep_re 162.0742220467153

{"global_step": 340000, "eval_re": [99.65046625947336, 112.72031881686517, 
173.34961519819805, 364.6882760283331, 320.2012510478053, 83.99890807605836, 
89.50097267660973, 136.5785662883255, 119.28210859583874, 120.77173747964584], 
"eval_len": [20, 22, 34, 67, 59, 17, 18, 26, 24, 23]}

 35%|███▍      | 349999/1000000 [5:48:30<8:02:27, 22.45it/s]global step 350000, trans_decision ep_re 143.60502455109736

{"global_step": 350000, "eval_re": [117.76650813004228, 101.46997709738069, 
120.71365293348964, 122.51371003138965, 112.6668550405047, 127.15462310961131, 
83.94240503631863, 156.4121038588762, 94.97758673718003, 398.4328235361803], 
"eval_len": [23, 20, 24, 24, 22, 25, 17, 30, 19, 72]}

 36%|███▌      | 359999/1000000 [5:58:30<7:57:27, 22.34it/s]global step 360000, trans_decision ep_re 139.3726126338044

{"global_step": 360000, "eval_re": [112.87564704857566, 119.47213909468675, 
101.47302420793817, 84.06777470936134, 300.19431774159824, 101.57971682039927, 
156.5593605691275, 147.28580718829872, 112.60577520548179, 157.61256375257696], 
"eval_len": [22, 23, 20, 17, 58, 20, 30, 28, 22, 33]}

 37%|███▋      | 369998/1000000 [6:08:20<7:42:22, 22.71it/s]global step 370000, trans_decision ep_re 136.72858537240694

{"global_step": 370000, "eval_re": [100.18283361949631, 156.67126989489446, 
88.88189561137374, 96.20001662891411, 149.64550143982754, 128.92323765471917, 
107.20163096804924, 122.25641021868508, 326.9674928987304, 90.3555647893792], 
"eval_len": [20, 31, 18, 19, 29, 26, 21, 24, 66, 18]}

 38%|███▊      | 379997/1000000 [6:18:40<7:39:43, 22.48it/s]global step 380000, trans_decision ep_re 325.5526308267308

{"global_step": 380000, "eval_re": [303.0257036037596, 316.57635159910643, 
406.2122136438178, 425.86349988119696, 528.2352144488013, 116.61673358116047, 
345.9307949342283, 89.03652486952484, 430.06602722635125, 293.96324447936115], 
"eval_len": [60, 60, 77, 78, 101, 23, 65, 18, 82, 55]}

 39%|███▉      | 389999/1000000 [6:28:40<7:31:04, 22.54it/s]global step 390000, trans_decision ep_re 147.6721738174111

{"global_step": 390000, "eval_re": [113.686321590108, 183.87006642752033, 
123.5670121934932, 106.76950750441816, 165.4248890025562, 89.02140447281937, 
123.57056161583844, 145.77392383214155, 113.22458762171628, 311.81346391349956],
"eval_len": [22, 36, 24, 21, 33, 18, 24, 28, 22, 57]}

 40%|███▉      | 399999/1000000 [6:38:31<7:23:32, 22.55it/s]global step 400000, trans_decision ep_re 121.3146349718879

{"global_step": 400000, "eval_re": [127.55283735870188, 124.56496905785227, 
84.00926211843874, 116.87899468311757, 96.25531279571382, 95.93292221357994, 
213.10385218014898, 107.97011707491897, 130.42397057178903, 116.45411166461805],
"eval_len": [25, 24, 17, 23, 19, 19, 41, 21, 25, 23]}

 41%|████      | 409999/1000000 [6:48:50<7:22:10, 22.24it/s]global step 410000, trans_decision ep_re 145.0721274873768

{"global_step": 410000, "eval_re": [125.70021774021586, 172.2711385710216, 
117.26925572383831, 102.00257566877349, 246.7550658881412, 118.8912710008556, 
165.48885053181021, 104.31681563436601, 167.4308091767415, 130.59527493800408], 
"eval_len": [24, 33, 23, 20, 46, 23, 32, 21, 32, 26]}

 42%|████▏     | 419999/1000000 [6:58:50<7:12:27, 22.35it/s]global step 420000, trans_decision ep_re 110.44782793536744

{"global_step": 420000, "eval_re": [117.42235670293897, 124.51346415737794, 
95.48024186793168, 166.00944824018097, 84.00307957757363, 90.77704440665813, 
83.982565972101, 116.63988477740692, 136.2054101978728, 89.44478345363247], 
"eval_len": [23, 24, 19, 32, 17, 18, 17, 23, 27, 18]}

 43%|████▎     | 429998/1000000 [7:08:40<7:01:13, 22.55it/s]global step 430000, trans_decision ep_re 165.21757433190587

{"global_step": 430000, "eval_re": [151.36969894452673, 106.07729742504519, 
95.45914715438556, 119.5443079450419, 109.36308597542114, 178.6001717563039, 
338.9455976568162, 102.99605433422067, 330.72408061748075, 119.09630150981653], 
"eval_len": [29, 21, 19, 23, 21, 35, 61, 21, 62, 23]}

 44%|████▍     | 439997/1000000 [7:19:00<6:53:38, 22.56it/s]global step 440000, trans_decision ep_re 144.40039663054253

{"global_step": 440000, "eval_re": [168.8784112211633, 131.54744242456604, 
143.78178381122902, 318.70305482052123, 89.61323584298587, 89.45038906039781, 
96.21214127832279, 165.36887281293625, 143.64064090998932, 96.8079941233138], 
"eval_len": [33, 25, 28, 61, 18, 18, 19, 32, 28, 19]}

 45%|████▍     | 449999/1000000 [7:29:00<6:48:49, 22.42it/s]global step 450000, trans_decision ep_re 152.29241311185768

{"global_step": 450000, "eval_re": [96.0113520115685, 113.57487109867448, 
177.11794262815513, 90.39255421663732, 143.20324249410677, 89.81298792551236, 
434.5140763787501, 96.60722414469531, 120.37829236944444, 161.31158785103224], 
"eval_len": [19, 22, 34, 18, 28, 18, 80, 19, 23, 32]}

 46%|████▌     | 459999/1000000 [7:39:00<6:41:46, 22.40it/s]global step 460000, trans_decision ep_re 164.90945976553257

{"global_step": 460000, "eval_re": [96.29595325667476, 95.37274697566743, 
322.32910652478023, 101.14639344027759, 105.97761040237913, 469.9991400675832, 
140.26558314910167, 88.87685828839811, 95.63121397404731, 133.19999157641627], 
"eval_len": [19, 19, 62, 20, 21, 88, 27, 18, 19, 26]}

 47%|████▋     | 469999/1000000 [7:49:10<6:33:08, 22.47it/s]global step 470000, trans_decision ep_re 222.1751922080163

{"global_step": 470000, "eval_re": [122.43824306755704, 89.47994094974976, 
328.306773779615, 265.87119608311514, 274.86431770271287, 88.87863787247913, 
479.2034161880888, 346.69664864517483, 119.03344150806117, 106.97930628360969], 
"eval_len": [26, 18, 63, 50, 55, 18, 89, 67, 23, 21]}

 48%|████▊     | 479999/1000000 [7:59:10<6:23:01, 22.63it/s]global step 480000, trans_decision ep_re 188.80769245342898

{"global_step": 480000, "eval_re": [280.71538733208865, 97.36357928578315, 
150.61804775315227, 108.72051423893768, 113.09206369853736, 332.8035060412599, 
118.66958527919566, 224.90718377473976, 107.1600555453583, 354.02700158523726], 
"eval_len": [55, 19, 30, 21, 22, 63, 23, 43, 21, 68]}

 49%|████▉     | 489997/1000000 [8:09:01<6:17:34, 22.51it/s]global step 490000, trans_decision ep_re 144.7714192119357

{"global_step": 490000, "eval_re": [90.76138597185751, 118.5553938761833, 
107.68420691772546, 156.20092653229622, 118.98945889451862, 130.63289224040085, 
89.11789637419908, 106.47193455999907, 95.27548576253736, 434.0246109896395], 
"eval_len": [18, 23, 21, 30, 24, 25, 18, 21, 19, 81]}

 50%|████▉     | 499999/1000000 [8:19:20<6:11:21, 22.44it/s]global step 500000, trans_decision ep_re 145.74108874602894

{"global_step": 500000, "eval_re": [296.15268174298683, 90.06079923595829, 
170.0382275651906, 101.633050559926, 102.59333970848552, 123.72107778853623, 
144.85949117236902, 160.3566860865046, 129.18949936116954, 138.80603423916278], 
"eval_len": [61, 18, 33, 20, 20, 24, 28, 31, 25, 27]}

 51%|█████     | 509999/1000000 [8:29:20<6:06:57, 22.26it/s]global step 510000, trans_decision ep_re 194.69882257353007

{"global_step": 510000, "eval_re": [112.33461232124728, 475.3432897188255, 
88.98084976839895, 605.3768927900288, 102.52988009482372, 107.4827023822759, 
101.72067341275996, 101.47328702975067, 119.0451984416136, 132.70083977557644], 
"eval_len": [22, 100, 18, 130, 20, 21, 20, 20, 23, 26]}

 52%|█████▏    | 519999/1000000 [8:39:20<5:57:45, 22.36it/s]global step 520000, trans_decision ep_re 139.0944836051581

{"global_step": 520000, "eval_re": [124.81624152612694, 89.36933089849587, 
360.3288000469831, 135.68673991632465, 100.21931313266063, 101.33989208059938, 
135.25358630638786, 120.65014219461412, 133.75031194684655, 89.53047800254187], 
"eval_len": [24, 18, 70, 26, 20, 20, 26, 23, 26, 18]}

 53%|█████▎    | 529999/1000000 [8:49:30<5:49:46, 22.40it/s]global step 530000, trans_decision ep_re 153.21755679555486

{"global_step": 530000, "eval_re": [300.6564857342912, 139.86189291545654, 
176.85693397533055, 101.89207759483824, 120.89269641169628, 155.61425904489582, 
175.94493205804378, 136.47114377494313, 127.41145560398078, 96.57369084207224], 
"eval_len": [55, 27, 34, 20, 23, 30, 34, 27, 25, 19]}

 54%|█████▍    | 539999/1000000 [8:59:30<5:40:46, 22.50it/s]global step 540000, trans_decision ep_re 147.84283171990668

{"global_step": 540000, "eval_re": [138.71578445309405, 100.77779134982234, 
155.2415640333256, 89.75708059845083, 198.05354028041006, 136.09961913475868, 
144.0346625623648, 90.71271899805437, 329.3577621803102, 95.67779360847618], 
"eval_len": [27, 20, 30, 18, 38, 26, 28, 18, 71, 19]}

 55%|█████▍    | 549998/1000000 [9:09:22<5:29:53, 22.74it/s]global step 550000, trans_decision ep_re 195.21458217154128

{"global_step": 550000, "eval_re": [311.8918156384349, 158.78527584807657, 
329.160583554387, 88.93164886941025, 327.97843728737, 175.59490498409878, 
148.54501742012027, 96.90687590023119, 102.09006724303178, 212.26119497025178], 
"eval_len": [57, 31, 59, 18, 69, 34, 29, 19, 20, 43]}

 56%|█████▌    | 559999/1000000 [9:19:40<5:26:49, 22.44it/s]global step 560000, trans_decision ep_re 164.68488956453783

{"global_step": 560000, "eval_re": [88.8746737282301, 139.62843581988875, 
108.14168083836114, 108.50574361623968, 326.08575302068664, 123.45373484253504, 
83.92546903146575, 132.899329735348, 122.14456406413855, 413.18951094848444], 
"eval_len": [18, 27, 21, 21, 63, 24, 17, 26, 24, 79]}

 57%|█████▋    | 569999/1000000 [9:29:33<5:20:17, 22.38it/s]global step 570000, trans_decision ep_re 122.97542647322891

{"global_step": 570000, "eval_re": [128.36012818546786, 214.7530851884954, 
119.24365544121791, 95.70475154951899, 131.06414850072505, 90.84107254723304, 
130.24744098746604, 109.16529502749863, 101.66272899653886, 108.71195830812738],
"eval_len": [25, 41, 23, 19, 25, 18, 26, 21, 20, 21]}

 58%|█████▊    | 579999/1000000 [9:39:50<5:11:07, 22.50it/s]global step 580000, trans_decision ep_re 175.74436814441813

{"global_step": 580000, "eval_re": [228.99266631516448, 101.06396640300999, 
171.00646963568752, 216.87710756921652, 118.60635040217831, 95.95325538403208, 
197.7808852267495, 111.18020426962084, 419.5616685773102, 96.4211076612117], 
"eval_len": [43, 20, 34, 41, 23, 19, 38, 22, 80, 19]}

 59%|█████▉    | 589999/1000000 [9:49:50<5:07:21, 22.23it/s]global step 590000, trans_decision ep_re 131.1580028456479

{"global_step": 590000, "eval_re": [148.17576794601607, 118.93027518405026, 
149.98585106743886, 145.87173597526262, 108.50344290552435, 84.11610043514949, 
88.90105942005566, 270.6861439764967, 106.82203301071631, 89.5876185357689], 
"eval_len": [30, 23, 29, 28, 21, 17, 18, 53, 21, 18]}

 60%|█████▉    | 599999/1000000 [9:59:50<4:57:59, 22.37it/s]global step 600000, trans_decision ep_re 191.42990560464756

{"global_step": 600000, "eval_re": [95.89561199578654, 94.12351786749181, 
210.0877825199475, 102.32579839572429, 200.9332512344249, 140.5221546643309, 
272.28590722365317, 118.44282144195772, 299.3300857671107, 380.352124936048], 
"eval_len": [19, 19, 43, 20, 39, 28, 51, 23, 58, 72]}

 61%|██████    | 609999/1000000 [10:09:42<4:49:53, 22.42it/s]global step 610000, trans_decision ep_re 181.86462346299976

{"global_step": 610000, "eval_re": [140.39484880757675, 141.34715556999834, 
462.17927288252133, 121.34890811572726, 352.2856140512776, 83.9936647352555, 
125.73724959905502, 135.13996879025424, 125.67553486045594, 130.5440172178758], 
"eval_len": [27, 27, 94, 24, 65, 17, 24, 26, 24, 26]}

 62%|██████▏   | 619999/1000000 [10:20:00<4:40:02, 22.62it/s]global step 620000, trans_decision ep_re 188.36447312584107

{"global_step": 620000, "eval_re": [141.8497848940752, 160.20822124935748, 
96.64289582259647, 742.8223826377476, 104.12330803646859, 137.49907393794928, 
162.94545874134369, 163.4722415436632, 90.06139923320431, 84.01996516200494], 
"eval_len": [27, 31, 19, 150, 21, 27, 31, 31, 18, 17]}

 63%|██████▎   | 629999/1000000 [10:30:00<4:34:48, 22.44it/s]global step 630000, trans_decision ep_re 149.06274207214406

{"global_step": 630000, "eval_re": [103.09332417364234, 203.96696709423196, 
120.22129516647544, 128.53965690757445, 164.762579934627, 89.85956357591598, 
128.0722848291173, 355.8822735386129, 106.47549100664087, 89.75398449460238], 
"eval_len": [20, 40, 24, 25, 32, 18, 25, 68, 21, 18]}

 64%|██████▍   | 639997/1000000 [10:39:51<4:26:59, 22.47it/s]global step 640000, trans_decision ep_re 181.15063790914456

{"global_step": 640000, "eval_re": [280.3994338650629, 378.9725283825457, 
101.89443630013952, 124.20669755915556, 95.2886000879018, 95.04262464971208, 
356.1677395782512, 90.44534441448864, 156.61626777322013, 132.47270648096784], 
"eval_len": [55, 75, 20, 25, 19, 19, 70, 18, 30, 26]}

 65%|██████▍   | 649999/1000000 [10:50:10<4:20:03, 22.43it/s]global step 650000, trans_decision ep_re 159.25268236789717

{"global_step": 650000, "eval_re": [261.7546002331339, 131.93252356507156, 
164.0061792911071, 162.93472624745894, 139.50614849580032, 106.25091939315823, 
138.81548043443377, 102.91976433622906, 288.89003105125687, 95.51645063132185], 
"eval_len": [50, 25, 32, 32, 27, 21, 27, 20, 55, 19]}

 66%|██████▌   | 659999/1000000 [11:00:10<4:10:45, 22.60it/s]global step 660000, trans_decision ep_re 172.78340353015165

{"global_step": 660000, "eval_re": [125.35364026648003, 310.81233324138736, 
447.4508445562553, 96.30233651974802, 220.0283057914062, 88.91466546818972, 
96.65633414313147, 102.21679139396966, 138.93126173897613, 101.1675221819724], 
"eval_len": [24, 67, 88, 19, 45, 18, 19, 20, 28, 20]}

 67%|██████▋   | 669998/1000000 [11:10:01<4:03:50, 22.56it/s]global step 670000, trans_decision ep_re 119.19669133325286

{"global_step": 670000, "eval_re": [83.99013973451883, 113.41135483259988, 
124.69637157840262, 284.73455562295896, 100.74692316272647, 101.7924053597443, 
88.86493331458561, 108.1425917403961, 95.60935253653113, 89.97828545006445], 
"eval_len": [17, 22, 25, 55, 20, 20, 18, 21, 19, 18]}

 68%|██████▊   | 679997/1000000 [11:20:20<3:58:50, 22.33it/s]global step 680000, trans_decision ep_re 228.8921214822795

{"global_step": 680000, "eval_re": [241.28888061406414, 240.55537905842786, 
90.13221959802925, 95.69924363062299, 403.084653245792, 122.71101149349185, 
409.16571755403623, 410.27696381244465, 171.46014742435756, 104.54699839152839],
"eval_len": [46, 47, 18, 19, 74, 24, 88, 79, 33, 21]}

 69%|██████▉   | 689999/1000000 [11:30:20<3:49:32, 22.51it/s]global step 690000, trans_decision ep_re 151.77954675247472

{"global_step": 690000, "eval_re": [139.6777038832585, 107.60341339602287, 
122.3468681943312, 89.05412194883492, 133.75242206585327, 143.23146853065512, 
89.8111524194448, 436.3521865647864, 130.70605158943474, 125.26007893212535], 
"eval_len": [27, 21, 24, 18, 26, 29, 18, 82, 26, 24]}

 70%|██████▉   | 699999/1000000 [11:40:11<3:41:14, 22.60it/s]global step 700000, trans_decision ep_re 155.59578149776092

{"global_step": 700000, "eval_re": [83.92334794584785, 100.72384406051067, 
171.28404070899617, 285.45825895750164, 159.12213454923355, 339.4495877122161, 
95.50456835791418, 96.43802164583387, 90.24192413576031, 133.81208690379503], 
"eval_len": [17, 20, 33, 64, 31, 62, 19, 19, 18, 26]}

 71%|███████   | 709998/1000000 [11:50:30<3:32:04, 22.79it/s]global step 710000, trans_decision ep_re 115.9338677838158

{"global_step": 710000, "eval_re": [83.87461896994695, 139.0691508428845, 
101.48871523370198, 100.56219828627265, 108.87379206994758, 193.56090348084254, 
90.07624421472167, 114.04112967136686, 106.89591781981275, 120.89600724866051], 
"eval_len": [17, 27, 20, 20, 21, 38, 18, 22, 21, 24]}

 72%|███████▏  | 719999/1000000 [12:00:30<3:27:11, 22.52it/s]global step 720000, trans_decision ep_re 165.34760152198893

{"global_step": 720000, "eval_re": [143.77024795611405, 113.36632300279783, 
248.0071210242644, 119.36178250393564, 225.92324073367587, 289.4754761319569, 
152.4023840287106, 144.9606261099934, 114.55819450331649, 101.65061922512433], 
"eval_len": [29, 22, 46, 24, 46, 55, 29, 28, 22, 20]}

 73%|███████▎  | 729999/1000000 [12:10:20<3:19:51, 22.52it/s]global step 730000, trans_decision ep_re 182.13734625696281

{"global_step": 730000, "eval_re": [116.0843697592767, 337.9270362890401, 
318.37199941768836, 83.86479952241703, 101.16968645059389, 127.76968047081593, 
113.39126328582134, 373.1231462922034, 99.27591945903916, 150.39556162273215], 
"eval_len": [23, 65, 60, 17, 20, 25, 22, 75, 20, 29]}

 74%|███████▍  | 739998/1000000 [12:20:40<3:10:37, 22.73it/s]global step 740000, trans_decision ep_re 164.82570072676282

{"global_step": 740000, "eval_re": [156.0488898952776, 102.56362580334462, 
460.0528832628062, 151.90261049940503, 113.33157081997606, 119.17117499976783, 
147.437109263654, 88.9537633585471, 127.61625903769328, 181.17912032715657], 
"eval_len": [30, 20, 92, 30, 22, 23, 28, 18, 25, 35]}

 75%|███████▍  | 749999/1000000 [12:30:40<3:06:01, 22.40it/s]global step 750000, trans_decision ep_re 169.53353950923008

{"global_step": 750000, "eval_re": [89.48311384879652, 133.44962194521813, 
95.15520603159513, 138.27652428272611, 402.81926055900885, 124.19376987867479, 
147.65760565993324, 343.24119479067355, 102.00295704276101, 119.05614105291362],
"eval_len": [18, 26, 19, 27, 76, 24, 28, 68, 20, 23]}

 76%|███████▌  | 759997/1000000 [12:40:32<2:58:34, 22.40it/s]global step 760000, trans_decision ep_re 204.70808833126495

{"global_step": 760000, "eval_re": [146.41159553332523, 126.74706361328958, 
84.02659485874192, 140.54560164568375, 338.10782365292397, 273.55807183641275, 
84.05802471205992, 102.5226693948651, 572.873395629514, 178.23004243583324], 
"eval_len": [28, 25, 17, 27, 71, 57, 17, 20, 110, 34]}

 77%|███████▋  | 769999/1000000 [12:50:50<2:51:02, 22.41it/s]global step 770000, trans_decision ep_re 122.63484603678418

{"global_step": 770000, "eval_re": [90.1566769712135, 335.1162812909436, 
111.85466935568071, 88.78351595739453, 95.29825953269373, 95.8906190337834, 
135.35028362706993, 89.65772496966859, 84.00621814722471, 100.23421148216934], 
"eval_len": [18, 72, 22, 18, 19, 19, 27, 18, 17, 20]}

 78%|███████▊  | 779999/1000000 [13:00:50<2:42:33, 22.56it/s]global step 780000, trans_decision ep_re 157.66637883184154

{"global_step": 780000, "eval_re": [342.7372614497294, 96.14498929168509, 
108.07638375205954, 347.0580156096343, 150.83058508900092, 100.3189837793569, 
103.05514372734096, 148.57523652442399, 89.80230458530133, 90.06488450988283], 
"eval_len": [63, 19, 21, 65, 29, 20, 20, 28, 18, 18]}

 79%|███████▉  | 789997/1000000 [13:10:42<2:35:46, 22.47it/s]global step 790000, trans_decision ep_re 188.43618523014567

{"global_step": 790000, "eval_re": [119.00419185632775, 136.90752621887756, 
102.22336045304941, 363.35071845661287, 88.92279074725045, 105.52049750503761, 
314.86561123478987, 371.3054969261881, 132.33629765110996, 149.9253612522127], 
"eval_len": [23, 27, 20, 70, 18, 21, 65, 70, 26, 32]}

 80%|███████▉  | 799999/1000000 [13:21:00<2:28:05, 22.51it/s]global step 800000, trans_decision ep_re 124.56050670381171

{"global_step": 800000, "eval_re": [134.05547809433455, 84.00553737042517, 
136.76663467865197, 108.06984094005395, 149.85652334386867, 106.04071903694222, 
108.31659191483853, 133.04761621757262, 117.94017006337789, 167.50595537805157],
"eval_len": [26, 17, 26, 21, 30, 21, 21, 26, 23, 33]}

 81%|████████  | 809999/1000000 [13:31:00<2:20:17, 22.57it/s]global step 810000, trans_decision ep_re 167.77761124408235

{"global_step": 810000, "eval_re": [105.41582558871673, 310.86305314883634, 
110.48162500631871, 114.41922643448359, 123.09785025360858, 198.1334073186686, 
122.5096765783367, 156.7798822631243, 89.80575102005906, 346.2698148286709], 
"eval_len": [21, 67, 22, 22, 24, 39, 24, 31, 18, 64]}

 82%|████████▏ | 819999/1000000 [13:41:10<2:13:43, 22.43it/s]global step 820000, trans_decision ep_re 100.1252427848093

{"global_step": 820000, "eval_re": [89.75993944315886, 128.59442860620888, 
108.22936828060085, 83.80277988482837, 90.3709617864351, 90.31876731822794, 
90.15668686137543, 147.10835269145355, 88.90921374377557, 84.00192923202843], 
"eval_len": [18, 25, 21, 17, 18, 18, 18, 28, 18, 17]}

 83%|████████▎ | 829999/1000000 [13:51:10<2:07:20, 22.25it/s]global step 830000, trans_decision ep_re 146.14564988773495

{"global_step": 830000, "eval_re": [89.34164212948815, 219.92266394426042, 
90.0936632933716, 438.4084229326648, 83.92711910027629, 121.04397473358746, 
89.85654621534864, 122.88793607740347, 116.42606718082072, 89.54846327012818], 
"eval_len": [18, 41, 18, 83, 17, 24, 18, 24, 23, 18]}

 84%|████████▍ | 839998/1000000 [14:01:02<1:58:32, 22.50it/s]global step 840000, trans_decision ep_re 179.9078496573193

{"global_step": 840000, "eval_re": [139.92925764087508, 100.73791211562798, 
333.4572147749705, 157.97022298695066, 152.1523169296525, 406.6631253422895, 
89.01103005440886, 163.23972414245821, 136.94608869603923, 118.97160388992044], 
"eval_len": [27, 20, 61, 30, 29, 77, 18, 31, 26, 23]}

 85%|████████▍ | 849999/1000000 [14:11:20<1:51:37, 22.40it/s]global step 850000, trans_decision ep_re 186.59843262456712

{"global_step": 850000, "eval_re": [176.08691788940428, 96.24794691193966, 
328.1674108205159, 83.96193646064592, 94.7849885247238, 156.71388839113595, 
111.61856749690598, 363.84907890377053, 124.2037480391773, 330.34984280745203], 
"eval_len": [34, 19, 63, 17, 19, 30, 22, 71, 24, 62]}

 86%|████████▌ | 859999/1000000 [14:21:20<1:44:08, 22.41it/s]global step 860000, trans_decision ep_re 152.41644349333708

{"global_step": 860000, "eval_re": [281.70758125864967, 112.88058636100965, 
122.05931866154297, 158.50718226340683, 117.64894175730817, 114.06787189946422, 
95.89197598692908, 294.3398971596979, 111.8305677420351, 115.23051184332705], 
"eval_len": [56, 22, 24, 30, 23, 23, 19, 61, 22, 23]}

 87%|████████▋ | 869997/1000000 [14:31:13<1:36:13, 22.52it/s]global step 870000, trans_decision ep_re 205.36276071724956

{"global_step": 870000, "eval_re": [427.448417768888, 107.03737765122523, 
89.56155281478576, 386.0889411890815, 89.02955574274996, 107.10563674701693, 
490.6087653191213, 134.47519170164634, 89.20854581931144, 133.06362241866893], 
"eval_len": [77, 21, 18, 69, 18, 21, 90, 27, 18, 26]}

 88%|████████▊ | 879999/1000000 [14:41:30<1:29:07, 22.44it/s]global step 880000, trans_decision ep_re 154.9919561292497

{"global_step": 880000, "eval_re": [95.55657290280932, 457.16392026702437, 
148.1603682065167, 124.45771516252594, 147.65236590635214, 127.8383944345075, 
101.16354692495771, 111.00160076084119, 132.5253565919057, 104.3997201350565], 
"eval_len": [19, 96, 29, 24, 28, 25, 20, 22, 26, 21]}

 89%|████████▉ | 889997/1000000 [14:51:21<1:21:33, 22.48it/s]global step 890000, trans_decision ep_re 155.46387559093165

{"global_step": 890000, "eval_re": [359.2684709945254, 263.44981319091454, 
118.88357536988515, 89.81101440692979, 142.136144850113, 121.25126674251726, 
83.88415360908196, 123.94641712897383, 124.2420281621583, 127.76587145421747], 
"eval_len": [68, 49, 23, 18, 28, 24, 17, 24, 24, 26]}

 90%|████████▉ | 899999/1000000 [15:01:41<1:14:04, 22.50it/s]global step 900000, trans_decision ep_re 120.26779881532104

{"global_step": 900000, "eval_re": [167.06644061734156, 152.46061024760675, 
141.40539311875582, 95.97164952096647, 95.50870872079138, 88.8750959002639, 
143.8016501739597, 144.38002783172254, 89.27801260959302, 83.93039941220954], 
"eval_len": [32, 29, 27, 19, 19, 18, 28, 28, 18, 17]}

 91%|█████████ | 909999/1000000 [15:11:41<1:07:56, 22.08it/s]global step 910000, trans_decision ep_re 240.36537474533824

{"global_step": 910000, "eval_re": [95.62487587912018, 496.46734613106145, 
84.06998750550973, 512.3201178378237, 410.86303873679265, 146.11736849394165, 
164.39863139346423, 100.74152951677434, 290.5954703745974, 102.45538158429738], 
"eval_len": [19, 99, 17, 103, 77, 28, 31, 20, 65, 20]}

 92%|█████████▏| 919999/1000000 [15:21:51<1:00:00, 22.22it/s]global step 920000, trans_decision ep_re 157.63216660258098

{"global_step": 920000, "eval_re": [117.55403875125842, 176.85510367063245, 
135.5572425611283, 274.76670090788457, 326.45182232307684, 90.84260131903443, 
83.97080612731818, 122.54031167470399, 135.237742328785, 112.54529636198728], 
"eval_len": [23, 33, 26, 51, 59, 18, 17, 24, 26, 22]}

 93%|█████████▎| 929999/1000000 [15:31:41<53:02, 22.00it/s]global step 930000, trans_decision ep_re 95.93059593052644

{"global_step": 930000, "eval_re": [95.73608097007681, 133.61796066164243, 
125.71165404982465, 89.99651003477528, 84.04317136641605, 88.86603236923487, 
83.91323506101062, 84.0856721263732, 83.92674963588905, 89.40889303002122], 
"eval_len": [19, 26, 24, 18, 17, 18, 17, 17, 17, 18]}

 94%|█████████▍| 939999/1000000 [15:42:01<44:43, 22.36it/s]global step 940000, trans_decision ep_re 176.5481349843923

{"global_step": 940000, "eval_re": [820.4864965200792, 90.08745547502404, 
99.513543784034, 102.72552914336787, 113.13916266676388, 96.00752980500005, 
114.0955423184615, 126.45175396321777, 112.88828385472617, 90.08605231324876], 
"eval_len": [167, 18, 20, 20, 22, 19, 22, 25, 22, 18]}

 95%|█████████▍| 949999/1000000 [15:52:01<37:15, 22.37it/s]global step 950000, trans_decision ep_re 126.38179038710389

{"global_step": 950000, "eval_re": [106.21737548151773, 104.80269899340927, 
180.45681237320053, 123.10472888177434, 95.96851255007182, 108.72316999447902, 
90.52239311232827, 177.52207549030533, 180.03558159783046, 96.46455539612207], 
"eval_len": [21, 21, 34, 25, 19, 21, 18, 35, 34, 19]}

 96%|█████████▌| 959999/1000000 [16:02:11<29:42, 22.43it/s]global step 960000, trans_decision ep_re 109.16061185472873

{"global_step": 960000, "eval_re": [111.69221492891594, 112.87152530447572, 
100.49115877848031, 95.3303103714422, 136.31076188555377, 94.77534863038113, 
102.56102543241683, 115.49436963393971, 96.17991345821986, 125.89949012346179], 
"eval_len": [22, 22, 20, 19, 26, 19, 20, 23, 19, 25]}

 97%|█████████▋| 969999/1000000 [16:12:01<22:14, 22.48it/s]global step 970000, trans_decision ep_re 133.78227067123288

{"global_step": 970000, "eval_re": [202.9391453848089, 137.2174451967701, 
120.80912468337849, 118.92144929186766, 144.25329811316277, 95.79029830569324, 
132.59721121332143, 89.33529597474057, 142.96926530685445, 152.99017324173118], 
"eval_len": [38, 28, 25, 23, 28, 19, 26, 18, 27, 30]}

 98%|█████████▊| 979999/1000000 [16:22:21<14:58, 22.26it/s]global step 980000, trans_decision ep_re 172.62183347278943

{"global_step": 980000, "eval_re": [117.00927842716702, 106.37025141866616, 
380.4642285747295, 133.865116096976, 137.8967334514873, 114.39702576756532, 
117.68121264149148, 148.5485173946011, 351.4016877441805, 118.58428321102993], 
"eval_len": [23, 21, 69, 26, 27, 22, 23, 29, 65, 23]}

 99%|█████████▉| 989999/1000000 [16:32:31<07:30, 22.22it/s]global step 990000, trans_decision ep_re 155.37431087863678

{"global_step": 990000, "eval_re": [89.96644547387102, 142.14046240728314, 
338.8389743045909, 111.91786457255046, 89.53745793523744, 337.50414972366707, 
132.58288005096398, 120.46306474438119, 106.89114233654352, 83.90066723727932], 
"eval_len": [18, 30, 64, 22, 18, 64, 26, 24, 21, 17]}

100%|█████████▉| 999999/1000000 [16:42:41<00:00, 22.05it/s]global step 1000000, trans_decision ep_re 165.50722502263642

{"global_step": 1000000, "eval_re": [417.51843293649694, 128.73670346901227, 
90.76942952560393, 96.41188870799755, 357.394234586805, 165.27202628526874, 
89.3782286116818, 89.45610044062781, 130.16362832270744, 89.97157734016298], 
"eval_len": [74, 25, 18, 19, 70, 32, 18, 18, 25, 18]}

100%|██████████| 1000000/1000000 [16:42:45<00:00, 16.62it/s]
