
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:40<13:21:14, 20.59it/s]global step 10000, trans_decision ep_re -100.74674319707752

{"global_step": 10000, "eval_re": [-238.76605631573042, -183.14855808952188, 
34.5163076273043, -49.49336552526113, -175.3108120005873, -246.48888740096533, 
-14.118576041485753, -132.41050703884187, -4.846604754934105, 2.59962756924835],
"eval_len": [1000, 1000, 54, 363, 1000, 1000, 107, 1000, 134, 429]}

  2%|▏         | 19999/1000000 [16:50<13:00:59, 20.91it/s]global step 20000, trans_decision ep_re -56.00050489536824

{"global_step": 20000, "eval_re": [-41.12445968350342, 89.5488000451176, 
-97.13733143716948, -7.641723477805605, -16.53055532672133, 6.548213829150263, 
-305.52294909096474, -49.81720954457534, -99.91594363826512, 
-38.41189062894526], "eval_len": [211, 133, 582, 156, 74, 48, 1000, 137, 218, 
416]}

  3%|▎         | 29998/1000000 [28:00<13:03:49, 20.63it/s]global step 30000, trans_decision ep_re -15.946246577343677

{"global_step": 30000, "eval_re": [-57.54623370313546, -49.782888319279074, 
-131.71445287370275, 11.76848854670517, -23.12326713257661, 46.125161607529186, 
65.5457112245549, -15.768097789650547, -4.424503977108902, -0.5423833567726997],
"eval_len": [197, 1000, 1000, 73, 1000, 75, 237, 129, 48, 63]}

  4%|▍         | 39999/1000000 [39:10<13:07:57, 20.31it/s]global step 40000, trans_decision ep_re -9.43484321859725

{"global_step": 40000, "eval_re": [7.481752919515696, -23.062396134292907, 
4.222667794821456, 8.00009268625411, 3.540998087221947, -86.91718578915268, 
-42.03165739497464, -13.359931781418654, 31.055096502940604, 
16.722130923112555], "eval_len": [111, 300, 141, 52, 21, 194, 1000, 133, 51, 
73]}

  5%|▍         | 49998/1000000 [50:10<12:48:31, 20.60it/s]global step 50000, trans_decision ep_re -24.63902843770122

{"global_step": 50000, "eval_re": [-18.73134109523806, 6.0409213219033475, 
0.1580602087295948, -17.2888884313051, -30.943187919120895, -11.10397059313186, 
-2.772244522857479, -38.92233813480351, -154.7823242876404, 21.955029076452135],
"eval_len": [1000, 50, 60, 154, 115, 57, 157, 119, 1000, 53]}

  6%|▌         | 59999/1000000 [1:01:20<12:52:52, 20.27it/s]global step 60000, trans_decision ep_re 11.224251063867126

{"global_step": 60000, "eval_re": [6.687225302086389, 37.98138968652754, 
-5.466009890456315, 18.153712358295703, 21.417513331879697, -16.0440489326213, 
27.419195005816164, -31.518369304148898, 29.183137583909552, 24.42876549738272],
"eval_len": [151, 70, 54, 124, 39, 341, 120, 527, 84, 91]}

  7%|▋         | 69999/1000000 [1:12:20<12:39:02, 20.42it/s]global step 70000, trans_decision ep_re 33.40642013056531

{"global_step": 70000, "eval_re": [110.20354253484146, 6.953655037752307, 
159.72484412768733, 174.40397009131476, 58.505175874805275, 48.99938490815982, 
33.023995160220736, -343.6021761502229, 4.288153643327746, 81.56365607776654], 
"eval_len": [1000, 49, 1000, 1000, 269, 88, 110, 1000, 75, 278]}

  8%|▊         | 79998/1000000 [1:23:30<12:11:26, 20.96it/s]global step 80000, trans_decision ep_re 11.278969387955286

{"global_step": 80000, "eval_re": [-0.12650426388748592, -37.1722767535432, 
32.63301711017488, 20.732244373976336, 48.38299226486931, 46.68229375107366, 
-38.36021227008418, 5.95850226034649, 26.91304732667635, 7.1465900799507], 
"eval_len": [215, 171, 1000, 146, 1000, 68, 1000, 44, 140, 57]}

  9%|▉         | 89998/1000000 [1:34:40<12:30:58, 20.20it/s]global step 90000, trans_decision ep_re 60.745122988056195

{"global_step": 90000, "eval_re": [29.683900682142625, 26.704443741816597, 
38.21845584329291, 77.44763947711904, 96.41886308358973, 11.018927401277558, 
112.20041091548944, 109.9917321585269, 22.826030650302037, 82.94082592700511], 
"eval_len": [34, 473, 1000, 426, 264, 105, 1000, 217, 120, 215]}

 10%|▉         | 99999/1000000 [1:46:10<12:20:53, 20.25it/s]global step 100000, trans_decision ep_re 42.25525510846447

{"global_step": 100000, "eval_re": [51.885029555840035, 96.38704343249245, 
18.850389615486726, 20.33694552580738, -22.760517001006846, 13.164374175684149, 
42.3830846782424, 106.12653154632967, 14.264356719552605, 81.91531283621613], 
"eval_len": [171, 333, 44, 146, 55, 69, 135, 897, 46, 311]}

 11%|█         | 109998/1000000 [1:57:30<12:11:38, 20.27it/s]global step 110000, trans_decision ep_re 99.42979806018623

{"global_step": 110000, "eval_re": [94.8592140222852, 68.32819164309072, 
2.863400376501952, 20.26366125273917, 242.99059825566954, 47.87443246205582, 
114.65811430746515, 292.0946267379569, 79.53619007438222, 30.829551469715483], 
"eval_len": [1000, 221, 190, 97, 1000, 188, 227, 1000, 280, 93]}

 12%|█▏        | 119999/1000000 [2:08:50<12:06:34, 20.19it/s]global step 120000, trans_decision ep_re 117.5968324750775

{"global_step": 120000, "eval_re": [168.73924287507012, 180.32686155549612, 
191.69708859806764, 13.961690406124609, 11.156595438918188, 191.00905757034403, 
25.390799780782707, 82.78737124439023, 267.76961551851315, 43.13000176306833], 
"eval_len": [387, 1000, 1000, 266, 24, 550, 268, 450, 608, 85]}

 13%|█▎        | 129999/1000000 [2:20:10<11:55:33, 20.26it/s]global step 130000, trans_decision ep_re 49.409103364350386

{"global_step": 130000, "eval_re": [52.06439355094491, 7.4371151532780315, 
16.15003895956698, -0.5492854591383507, 80.48061414149323, 114.8608616126878, 
14.943208219957825, 165.683880708761, 39.5068332613352, 3.5133734946171846], 
"eval_len": [1000, 142, 39, 175, 95, 1000, 225, 1000, 60, 14]}

 14%|█▍        | 139998/1000000 [2:31:30<11:49:07, 20.21it/s]global step 140000, trans_decision ep_re 94.28508131637987

{"global_step": 140000, "eval_re": [44.293888189243695, 213.7842662230283, 
40.302028034163214, 216.8134741920548, 85.5422773039278, 97.29349850361598, 
201.388089660934, 16.68109470181532, 18.10750970730648, 8.6446866477093], 
"eval_len": [138, 225, 108, 1000, 463, 1000, 1000, 40, 47, 42]}

 15%|█▍        | 149997/1000000 [2:42:50<11:42:35, 20.16it/s]global step 150000, trans_decision ep_re 84.27039265355029

{"global_step": 150000, "eval_re": [118.18212379825903, 109.42042611448345, 
14.733500952190997, 11.94341964195039, 57.01374461309077, 145.77068933022542, 
36.06391208407314, 136.05207884477898, 106.68256764177602, 106.8414635146747], 
"eval_len": [435, 273, 30, 17, 114, 1000, 66, 521, 312, 368]}

 16%|█▌        | 159999/1000000 [2:54:10<11:35:49, 20.12it/s]global step 160000, trans_decision ep_re 115.30037057917745

{"global_step": 160000, "eval_re": [150.62256472714085, 23.365924594712673, 
54.24297411157049, 230.1236431902102, 35.96552244728865, 12.270762030443219, 
65.88059734012019, 147.087636163377, 345.2066026562779, 88.23747853063337], 
"eval_len": [405, 47, 133, 481, 121, 39, 193, 1000, 1000, 305]}

 17%|█▋        | 169998/1000000 [3:05:20<11:02:26, 20.88it/s]global step 170000, trans_decision ep_re 61.2751342851076

{"global_step": 170000, "eval_re": [50.07660663781794, -46.72453079760702, 
46.11982178323237, -4.1722128205122075, 11.229440610332247, 18.87319565091048, 
273.5562979895235, 106.8010432548057, 139.2137610581361, 17.777919484436847], 
"eval_len": [281, 73, 156, 39, 33, 89, 1000, 1000, 246, 40]}

 18%|█▊        | 179999/1000000 [3:16:30<11:10:55, 20.37it/s]global step 180000, trans_decision ep_re 116.40531391243539

{"global_step": 180000, "eval_re": [142.87374540236118, 327.5999169834814, 
98.09317540325026, 110.88471300575574, 6.246994612962448, 124.1890244624279, 
32.45394096152079, 281.2112566850413, 5.084760110459353, 35.41561149709351], 
"eval_len": [1000, 1000, 1000, 175, 31, 1000, 107, 809, 1000, 98]}

 19%|█▉        | 189998/1000000 [3:27:50<10:44:57, 20.93it/s]global step 190000, trans_decision ep_re 78.3746665876674

{"global_step": 190000, "eval_re": [104.752867285592, 45.41105044041709, 
-3.326995467454333, 11.995879187239561, 79.77595475385506, 78.716544984705, 
34.52561944362366, 100.26443126098401, 106.43392614972225, 225.1973878379898], 
"eval_len": [1000, 127, 50, 53, 206, 1000, 1000, 330, 1000, 1000]}

 20%|█▉        | 199999/1000000 [3:39:10<10:46:57, 20.61it/s]global step 200000, trans_decision ep_re 140.25287221710025

{"global_step": 200000, "eval_re": [10.425360837645114, 166.90115450123497, 
318.5361958657564, 248.7527399443387, 93.78132823766524, 258.27292224741586, 
31.80913093132917, 15.695902570200833, 142.0022890010125, 116.35169803440355], 
"eval_len": [71, 352, 930, 1000, 1000, 1000, 159, 32, 1000, 259]}

 21%|██        | 209999/1000000 [3:50:30<10:42:45, 20.48it/s]global step 210000, trans_decision ep_re 207.13128435269732

{"global_step": 210000, "eval_re": [154.78248936743444, 357.0031846042414, 
192.77443926598505, -5.169368540484486, 226.5752368441932, 149.71009694753351, 
446.60570517117293, 405.6366734530233, 119.17643951641853, 24.217946897455356], 
"eval_len": [339, 728, 1000, 26, 591, 289, 1000, 864, 303, 54]}

 22%|██▏       | 219998/1000000 [4:01:40<10:29:03, 20.67it/s]global step 220000, trans_decision ep_re 90.12701093238763

{"global_step": 220000, "eval_re": [78.4082339687002, 126.16923240556245, 
7.582549700169097, 121.01197480141383, 102.07026963275392, 76.71106259630578, 
200.39168453906757, -5.50120742127161, 41.191226167676426, 153.23508293349866], 
"eval_len": [313, 208, 24, 445, 306, 106, 819, 45, 69, 344]}

 23%|██▎       | 229999/1000000 [4:12:50<10:19:35, 20.71it/s]global step 230000, trans_decision ep_re 69.21945417935878

{"global_step": 230000, "eval_re": [32.91184332102232, 34.22016822513627, 
210.58795353681563, 172.87426992721376, 33.49574304694443, -3.6396926830611633, 
-2.994606798607014, 9.959456506046921, 28.291098603305723, 176.48830810877087], 
"eval_len": [113, 65, 1000, 1000, 96, 77, 34, 31, 148, 569]}

 24%|██▍       | 239999/1000000 [4:23:50<10:16:07, 20.56it/s]global step 240000, trans_decision ep_re 153.4592925503684

{"global_step": 240000, "eval_re": [406.3509740054949, 40.670855710327146, 
117.15522701316263, 9.28787102577712, 87.36347559046199, 175.43654765789378, 
328.44045226253706, 164.096513854174, -5.142031647265282, 210.93304003112047], 
"eval_len": [1000, 126, 351, 60, 376, 637, 645, 1000, 237, 547]}

 25%|██▍       | 249999/1000000 [4:35:00<10:05:38, 20.64it/s]global step 250000, trans_decision ep_re 131.44120597427462

{"global_step": 250000, "eval_re": [38.31677049358542, 178.18692231672918, 
113.77809463577124, 96.57098401590964, 224.39061526609032, 147.60184880085433, 
185.45017723266173, 57.16980494132809, 233.63700753738095, 39.309834502435464], 
"eval_len": [118, 1000, 389, 194, 1000, 427, 1000, 225, 1000, 30]}

 26%|██▌       | 259998/1000000 [4:46:10<9:47:37, 20.99it/s]global step 260000, trans_decision ep_re 94.96597366345412

{"global_step": 260000, "eval_re": [25.605707998634703, 80.25770126456095, 
96.69044533934239, 58.59493027317777, 11.028091446688641, 220.54750278002774, 
91.30021758969202, 126.70663097165686, 133.75092017112397, 105.1775887996362], 
"eval_len": [33, 171, 183, 198, 42, 1000, 203, 449, 1000, 196]}

 27%|██▋       | 269999/1000000 [4:57:20<9:50:41, 20.60it/s]global step 270000, trans_decision ep_re 131.86341524035385

{"global_step": 270000, "eval_re": [60.85446983492439, 192.65218910778069, 
395.46278016918154, 25.45499229182895, 96.31009521722201, 66.68440723722259, 
71.7267234413839, 191.48325299380573, 116.28809150619111, 101.71715060399796], 
"eval_len": [1000, 610, 1000, 48, 139, 101, 394, 514, 310, 301]}

 28%|██▊       | 279999/1000000 [5:08:30<9:39:29, 20.71it/s]global step 280000, trans_decision ep_re 133.97341624188226

{"global_step": 280000, "eval_re": [124.7921133186343, 109.92537602398193, 
123.85555911327525, 23.09339051552783, 152.8627357637939, 285.29885844716625, 
54.10836145693843, 1.8219616672644872, 272.7022081651897, 191.27359794705038], 
"eval_len": [689, 255, 311, 48, 306, 528, 65, 72, 1000, 1000]}

 29%|██▉       | 289999/1000000 [5:19:30<9:30:43, 20.73it/s]global step 290000, trans_decision ep_re 141.98970074659502

{"global_step": 290000, "eval_re": [381.3396078232893, 367.76288088374116, 
70.66198603079366, 196.56906769047717, 38.06119077572457, 46.64808918069206, 
33.75389386011537, 189.76193283413937, 55.15436062635382, 40.183997760623626], 
"eval_len": [1000, 1000, 1000, 1000, 63, 129, 53, 612, 89, 75]}

 30%|██▉       | 299998/1000000 [5:30:40<9:19:36, 20.85it/s]global step 300000, trans_decision ep_re 163.98315939826585

{"global_step": 300000, "eval_re": [602.1779220616407, 98.3888766338151, 
1.2169739175719658, 60.38749959100792, 203.80399465442667, 234.931927712764, 
124.71675082478262, 94.72775375917797, 219.9252883538632, -0.44539352639165763],
"eval_len": [1000, 406, 38, 125, 1000, 1000, 330, 355, 504, 41]}

 31%|███       | 309999/1000000 [5:41:50<9:17:24, 20.63it/s]global step 310000, trans_decision ep_re 83.53507574213492

{"global_step": 310000, "eval_re": [13.574686103387075, 61.051142281979956, 
376.9859915344854, 135.64764303619134, 20.353679384012302, 132.04785040367116, 
10.171466897136524, 4.620867338328088, 18.25888025365329, 62.638550188504205], 
"eval_len": [47, 299, 793, 287, 42, 282, 26, 14, 60, 1000]}

 32%|███▏      | 319998/1000000 [5:52:50<9:03:38, 20.85it/s]global step 320000, trans_decision ep_re 149.7654976209843

{"global_step": 320000, "eval_re": [27.574419650924106, 90.69200746312191, 
338.1499553443476, 90.06055463445333, 322.95187451054863, 150.92852059233934, 
242.9765925026577, 98.33704093347282, 11.785014459897853, 124.19899611807978], 
"eval_len": [49, 125, 793, 214, 1000, 900, 1000, 193, 118, 1000]}

 33%|███▎      | 329998/1000000 [6:04:00<8:50:25, 21.05it/s]global step 330000, trans_decision ep_re 113.51074734050674

{"global_step": 330000, "eval_re": [26.548081473579956, 60.36059456986328, 
119.15389705673616, 129.40605142028406, 100.40311348464574, 78.66576539788979, 
188.93228944641317, 99.50224533195538, 140.76831882260626, 191.36711640109368], 
"eval_len": [20, 187, 165, 251, 178, 145, 1000, 217, 384, 403]}

 34%|███▍      | 339998/1000000 [6:15:00<8:49:23, 20.78it/s]global step 340000, trans_decision ep_re 201.5209393369695

{"global_step": 340000, "eval_re": [472.7578996728771, 50.76253981071505, 
224.88010855294775, 19.091752074277853, 190.02809054819477, 140.85565038518007, 
85.25265804366475, 63.21045424030586, 416.80344161939837, 351.56679842213316], 
"eval_len": [1000, 99, 461, 51, 561, 424, 345, 173, 874, 664]}

 35%|███▍      | 349998/1000000 [6:26:10<8:41:22, 20.78it/s]global step 350000, trans_decision ep_re 84.09199198675962

{"global_step": 350000, "eval_re": [75.8512765576222, 58.45655823410012, 
24.27858211022574, 8.910826163648217, 109.97214113635528, 50.229767411165476, 
160.7202694728928, 243.94668576908884, 81.83760753497964, 26.716205477517867], 
"eval_len": [1000, 179, 123, 73, 160, 161, 1000, 1000, 125, 57]}

 36%|███▌      | 359999/1000000 [6:37:20<8:35:48, 20.68it/s]global step 360000, trans_decision ep_re 234.95715385559055

{"global_step": 360000, "eval_re": [218.06998568016283, 324.04904917568155, 
141.99586173712456, 359.11793138932165, 135.93868266877612, 244.99170071759272, 
372.2251042944787, 140.70582511904612, 169.04025812120463, 243.43713965251683], 
"eval_len": [1000, 879, 421, 617, 340, 504, 717, 323, 1000, 541]}

 37%|███▋      | 369999/1000000 [6:48:30<8:27:38, 20.68it/s]global step 370000, trans_decision ep_re 149.33706024359532

{"global_step": 370000, "eval_re": [122.2968861483616, 190.933488394732, 
22.205142600946882, 153.81732896843022, 272.46894886013376, 339.7868962041829, 
161.05277340216847, 101.24365055611217, -4.171663458933852, 133.73715075981914],
"eval_len": [294, 546, 42, 1000, 648, 1000, 490, 244, 23, 1000]}

 38%|███▊      | 379998/1000000 [6:59:40<8:14:05, 20.91it/s]global step 380000, trans_decision ep_re 105.80731638085472

{"global_step": 380000, "eval_re": [64.84079931834955, 60.6302525665914, 
53.35415955237729, 349.2461719487337, 236.86549480262255, 212.90169644471152, 
8.466672854654426, 54.90610621122457, -4.005605251609107, 20.86741536089141], 
"eval_len": [145, 159, 215, 1000, 438, 1000, 87, 164, 88, 31]}

 39%|███▉      | 389999/1000000 [7:10:40<8:11:00, 20.71it/s]global step 390000, trans_decision ep_re 76.43860806780043

{"global_step": 390000, "eval_re": [56.71826889921322, 34.31145852153571, 
70.98408235498817, 56.352982992004215, 157.5158224203846, 12.020138216140774, 
-10.552176485440139, 30.723082963253827, 113.32686586861357, 
242.98555492731035], "eval_len": [75, 32, 120, 101, 1000, 501, 101, 86, 1000, 
724]}

 40%|███▉      | 399998/1000000 [7:21:50<7:56:25, 20.99it/s]global step 400000, trans_decision ep_re 94.11640259325354

{"global_step": 400000, "eval_re": [46.99660278701113, 1.3878690584168092, 
140.22028256788667, 24.773906400178745, 30.52760710857573, 117.87612778329333, 
19.401254986732962, 345.5678383807328, 87.35424007976613, 127.05829677994105], 
"eval_len": [64, 137, 263, 158, 214, 153, 28, 805, 225, 369]}

 41%|████      | 409998/1000000 [7:32:50<7:49:45, 20.93it/s]global step 410000, trans_decision ep_re 117.06927894586212

{"global_step": 410000, "eval_re": [14.512675194599515, 65.60967661698481, 
330.99894342220205, 124.71274937940179, 17.763112808051716, 49.587048790506145, 
201.7475856728063, 29.142931065277033, 298.449750202767, 38.16831630602486], 
"eval_len": [98, 123, 587, 237, 36, 105, 1000, 49, 543, 105]}

 42%|████▏     | 419999/1000000 [7:43:50<7:40:06, 21.01it/s]global step 420000, trans_decision ep_re 134.79376174292682

{"global_step": 420000, "eval_re": [17.664917646781852, 26.32168313609069, 
30.22428690691718, 225.0350098757776, 224.74276964860175, 304.55088905556136, 
241.3502610149233, 135.73736442980038, 50.1201870953049, 92.19024861950915], 
"eval_len": [454, 1000, 99, 1000, 547, 1000, 1000, 1000, 79, 298]}

 43%|████▎     | 429999/1000000 [7:55:10<7:38:16, 20.73it/s]global step 430000, trans_decision ep_re 122.29305681530191

{"global_step": 430000, "eval_re": [-8.168817489200224, 75.68574940503115, 
4.539824618013808, 222.40618603979098, 252.51879263678276, 143.07391969848666, 
14.39758629155504, 196.59991592231577, 284.4795164284815, 37.397894601761614], 
"eval_len": [121, 116, 54, 362, 768, 300, 99, 528, 1000, 68]}

 44%|████▍     | 439999/1000000 [8:06:10<7:32:35, 20.62it/s]global step 440000, trans_decision ep_re 124.70648932805595

{"global_step": 440000, "eval_re": [15.201204747735327, 391.5223061698596, 
36.646541389981515, 74.62503992838789, 336.9391061633407, 77.42061417009332, 
100.00795772292251, -0.1879186991719406, 140.69320972785738, 74.1968319595533], 
"eval_len": [121, 771, 196, 79, 619, 122, 112, 20, 272, 153]}

 45%|████▍     | 449999/1000000 [8:17:10<7:22:10, 20.73it/s]global step 450000, trans_decision ep_re 61.243248807070394

{"global_step": 450000, "eval_re": [138.88346516791268, 20.371457489725895, 
67.60636821877824, 6.420394392103573, 146.0271684407123, 52.65412439583372, 
20.543684407374993, 46.49230233912663, 37.68105204255917, 75.75247117657673], 
"eval_len": [292, 38, 113, 49, 1000, 140, 33, 99, 108, 159]}

 46%|████▌     | 459999/1000000 [8:28:10<7:14:31, 20.71it/s]global step 460000, trans_decision ep_re 116.82471816007902

{"global_step": 460000, "eval_re": [34.07908483782828, 217.40258335154303, 
133.40958498357236, 53.281120691595916, 70.74241172225348, 210.95970616543434, 
212.79738582806442, 34.97788710959769, 177.22927914340164, 23.368137767498755], 
"eval_len": [109, 837, 288, 159, 264, 1000, 1000, 58, 1000, 52]}

 47%|████▋     | 469999/1000000 [8:39:20<7:06:15, 20.72it/s]global step 470000, trans_decision ep_re 199.69870768483247

{"global_step": 470000, "eval_re": [253.347898222886, 29.825035977807687, 
195.457105242576, 388.16184108792766, 161.78784616303474, 185.9861779271813, 
200.3201348375754, 252.9176389976013, 207.4000612482255, 121.78333714350906], 
"eval_len": [1000, 192, 615, 1000, 934, 1000, 1000, 1000, 534, 1000]}

 48%|████▊     | 479999/1000000 [8:50:30<6:54:44, 20.90it/s]global step 480000, trans_decision ep_re 153.3104599716566

{"global_step": 480000, "eval_re": [157.13202988203702, 304.5712814751404, 
519.2483620061786, 80.2442253522814, 93.21166782763844, 79.39289446259116, 
142.86941546375772, 14.934789266690505, 3.0674948887772775, 138.43243909147344],
"eval_len": [1000, 649, 994, 165, 1000, 207, 251, 28, 38, 1000]}

 49%|████▉     | 489999/1000000 [9:01:50<6:51:16, 20.67it/s]global step 490000, trans_decision ep_re 77.31290069258245

{"global_step": 490000, "eval_re": [312.9480611619656, 20.5328655340742, 
83.65206851383067, 46.37147524688994, 31.25513882990004, 67.5001927594409, 
14.652424186724481, 103.76481674629616, -3.3567494781202685, 95.80871342482273],
"eval_len": [1000, 14, 152, 139, 144, 109, 38, 226, 63, 460]}

 50%|████▉     | 499999/1000000 [9:12:40<6:43:02, 20.68it/s]global step 500000, trans_decision ep_re 103.10013526136626

{"global_step": 500000, "eval_re": [-0.6704888392921253, 53.894212249292465, 
83.99028491071213, 56.62439738362974, 293.7488185739283, 174.3751355648631, 
78.19222004445015, 287.49816587085996, -7.11495372710835, 10.4635605823272], 
"eval_len": [66, 129, 189, 87, 1000, 403, 110, 1000, 116, 17]}

 51%|█████     | 509999/1000000 [9:23:50<6:32:24, 20.81it/s]global step 510000, trans_decision ep_re 141.3234085330576

{"global_step": 510000, "eval_re": [186.76221044186758, 293.15433913516216, 
35.91803539148217, 50.44631054548865, 103.3821596754207, 76.42025617533191, 
299.66717080987627, 264.4975722727708, 1.1873387464706737, 101.79869213670509], 
"eval_len": [1000, 1000, 38, 195, 319, 182, 1000, 1000, 36, 411]}

 52%|█████▏    | 519999/1000000 [9:34:50<6:27:36, 20.64it/s]global step 520000, trans_decision ep_re 117.55133389042942

{"global_step": 520000, "eval_re": [64.88895057312337, -0.40680650589001566, 
205.91281296587638, 217.4605999290767, 37.47951447247069, 238.04936270089738, 
2.6915603277860667, 93.23113191089132, 296.11405558434245, 20.09215694572011], 
"eval_len": [257, 66, 1000, 1000, 141, 787, 110, 152, 638, 99]}

 53%|█████▎    | 529999/1000000 [9:46:00<6:21:29, 20.53it/s]global step 530000, trans_decision ep_re 118.76487413060833

{"global_step": 530000, "eval_re": [145.31007954949493, 89.48307010879522, 
92.0263334779769, 25.08240400469715, 187.70563730155294, 68.45992015853516, 
-8.389440344034684, 323.2623029981559, 187.2938281076469, 77.4146059432631], 
"eval_len": [478, 307, 304, 37, 811, 92, 20, 1000, 402, 155]}

 54%|█████▍    | 539998/1000000 [9:57:00<6:10:10, 20.71it/s]global step 540000, trans_decision ep_re 197.25531912579586

{"global_step": 540000, "eval_re": [-3.814421232838785, 365.5375621773893, 
355.0570171567866, 229.68042098692533, 43.876366040717485, 304.02948506692684, 
125.62339462765706, 54.95932885914285, 250.79148381926996, 246.8125537559818], 
"eval_len": [40, 1000, 1000, 1000, 109, 1000, 292, 97, 1000, 419]}

 55%|█████▍    | 549999/1000000 [10:08:10<6:00:20, 20.81it/s]global step 550000, trans_decision ep_re 139.67461939687053

{"global_step": 550000, "eval_re": [-4.752903691672584, 189.6362683077754, 
146.57878703403028, 218.35092380811065, 59.894202169774054, 187.12016908054468, 
304.0127722589443, 184.05334727384468, 113.13949515652762, -1.2868674291737203],
"eval_len": [75, 594, 359, 1000, 75, 572, 1000, 394, 175, 39]}

 56%|█████▌    | 559999/1000000 [10:19:20<5:52:40, 20.79it/s]global step 560000, trans_decision ep_re 97.75775297078569

{"global_step": 560000, "eval_re": [10.632067771553443, 28.806661317828013, 
33.719524919219566, 189.9646865414931, 41.01689947049222, 101.14046712483132, 
324.14773013531527, 150.08595968646105, 79.75221133720237, 18.311321403460347], 
"eval_len": [78, 184, 189, 1000, 133, 543, 1000, 271, 286, 89]}

 57%|█████▋    | 569998/1000000 [10:30:20<5:41:23, 20.99it/s]global step 570000, trans_decision ep_re 124.32202211043764

{"global_step": 570000, "eval_re": [185.16522033507198, 55.69627234107224, 
17.49483988116925, 119.24333132095701, 0.8460910214351935, 205.84273698136317, 
179.61129669988452, 150.25399627337833, 59.528795277900024, 269.53764097214463],
"eval_len": [1000, 88, 37, 336, 39, 1000, 1000, 657, 159, 584]}

 58%|█████▊    | 579998/1000000 [10:41:30<5:32:36, 21.05it/s]global step 580000, trans_decision ep_re 90.20774294400232

{"global_step": 580000, "eval_re": [56.13656474261554, 32.82946943801546, 
105.58990515917044, 83.50256138388339, 4.7449042468333715, 275.6742403670538, 
79.04700677734346, 72.33195521638848, 66.98654659308555, 125.23427551563388], 
"eval_len": [104, 40, 402, 317, 50, 568, 285, 134, 1000, 228]}

 59%|█████▉    | 589998/1000000 [10:52:30<5:27:07, 20.89it/s]global step 590000, trans_decision ep_re 137.64208336301135

{"global_step": 590000, "eval_re": [195.18748666507776, 9.12326867314697, 
107.46962715676464, 229.095354513054, 140.7097289637503, 96.43728940964677, 
336.955744019158, 3.7942639826067825, 239.6569839685037, 17.99108627840452], 
"eval_len": [315, 322, 243, 1000, 472, 268, 1000, 19, 1000, 80]}

 60%|█████▉    | 599999/1000000 [11:03:40<5:20:49, 20.78it/s]global step 600000, trans_decision ep_re 60.435599779968285

{"global_step": 600000, "eval_re": [27.291409404143256, 21.499039125016708, 
-35.834462069374936, 244.07324275057852, -2.1267656342154857, 
63.446125990528735, 266.47446640851814, -1.0772936267062057, 0.383609537192092, 
20.226625914001943], "eval_len": [32, 17, 1000, 1000, 44, 103, 1000, 40, 22, 
114]}

 61%|██████    | 609999/1000000 [11:14:40<5:13:55, 20.71it/s]global step 610000, trans_decision ep_re 148.64669469896413

{"global_step": 610000, "eval_re": [40.4974063670845, 5.24669204391664, 
18.760848835444413, 104.64748267019117, 229.35090615727668, 432.54255311160614, 
219.55714463695543, 279.0801622770013, 67.36875130991102, 89.41499958025385], 
"eval_len": [156, 44, 178, 351, 665, 1000, 535, 1000, 334, 267]}

 62%|██████▏   | 619999/1000000 [11:25:50<5:07:11, 20.62it/s]global step 620000, trans_decision ep_re 134.25505292691693

{"global_step": 620000, "eval_re": [106.29237985002982, 267.50779772072747, 
58.970555718602434, 262.441642822729, 2.6228631211015543, 11.037191073228156, 
53.88934569715746, 246.42670071794362, 258.09918925300525, 75.26286329464469], 
"eval_len": [293, 1000, 1000, 1000, 42, 71, 111, 1000, 791, 143]}

 63%|██████▎   | 629999/1000000 [11:37:00<4:57:58, 20.70it/s]global step 630000, trans_decision ep_re 95.25034343695607

{"global_step": 630000, "eval_re": [-0.6261628156713805, 0.4680172833210614, 
68.42190395748979, 176.33745442941608, 62.20341474524494, 6.999258335784988, 
281.95820074670326, 1.2765735001353322, 165.99682324385603, 189.46795094328058],
"eval_len": [32, 14, 204, 1000, 286, 59, 940, 28, 519, 1000]}

 64%|██████▍   | 639999/1000000 [11:48:00<4:49:43, 20.71it/s]global step 640000, trans_decision ep_re 159.94945986798595

{"global_step": 640000, "eval_re": [255.47142958347453, 28.49205771500023, 
24.59059907288736, 370.7348360303537, 10.400891269701921, 206.9032492419157, 
107.11790132623908, 24.45900158048688, 238.28146414074703, 333.0431687190533], 
"eval_len": [1000, 42, 91, 1000, 47, 547, 363, 27, 1000, 704]}

 65%|██████▍   | 649999/1000000 [11:59:10<4:41:04, 20.75it/s]global step 650000, trans_decision ep_re 149.16008730857877

{"global_step": 650000, "eval_re": [42.0674081306946, 104.96086977003189, 
112.24133144461342, 13.639218476440629, 88.49720475259562, 298.0084583369156, 
346.6664394071089, 33.44047758253763, 204.36678298816676, 247.71268219668255], 
"eval_len": [254, 378, 326, 288, 231, 1000, 1000, 109, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [12:10:20<4:34:09, 20.67it/s]global step 660000, trans_decision ep_re 145.65169210599322

{"global_step": 660000, "eval_re": [42.57034727097231, 248.38393874526764, 
130.0551364815585, 257.6097473979961, 69.31048346694769, 218.1674787128801, 
48.65275941926278, 1.1677495759574434, 195.369167105469, 245.2301128836207], 
"eval_len": [153, 1000, 415, 1000, 227, 556, 96, 38, 668, 1000]}

 67%|██████▋   | 669999/1000000 [12:21:30<4:25:42, 20.70it/s]global step 670000, trans_decision ep_re 195.45204135456146

{"global_step": 670000, "eval_re": [340.7716412965238, 418.5102570656148, 
73.58391411945975, 414.4057754216542, 44.44294995304227, -9.590240795217637, 
51.77708504110414, 350.1374863840063, 241.49822803262293, 28.983317026803615], 
"eval_len": [624, 1000, 140, 993, 114, 85, 118, 1000, 1000, 93]}

 68%|██████▊   | 679999/1000000 [12:32:50<4:16:57, 20.76it/s]global step 680000, trans_decision ep_re 155.51059014680777

{"global_step": 680000, "eval_re": [337.5973543815945, 323.61796864259486, 
14.046459145632424, 261.32707564506217, 37.50128918625408, 79.34597002955667, 
348.54892083088987, 5.585471757128346, 51.1168312124524, 96.41856063691229], 
"eval_len": [1000, 1000, 31, 1000, 126, 264, 1000, 32, 193, 470]}

 69%|██████▉   | 689999/1000000 [12:43:50<4:11:01, 20.58it/s]global step 690000, trans_decision ep_re 244.77717978638657

{"global_step": 690000, "eval_re": [138.38581073250074, 178.2803982393475, 
439.3699986953334, 313.7005160832828, 9.558116735771769, 368.0760946181143, 
102.95180871499703, 297.25780207127974, 247.13727520875432, 353.0539767644841], 
"eval_len": [432, 437, 1000, 1000, 38, 1000, 210, 1000, 470, 1000]}

 70%|██████▉   | 699999/1000000 [12:55:10<4:02:23, 20.63it/s]global step 700000, trans_decision ep_re 147.71789111852866

{"global_step": 700000, "eval_re": [157.26116512054955, 9.210389818964732, 
253.48644214825188, 26.436464431034192, 304.8592819086029, 248.5350713783845, 
14.010812913782152, 344.16549230039124, 2.8844119006506874, 116.32937926467483],
"eval_len": [549, 39, 1000, 40, 939, 1000, 77, 1000, 49, 310]}

 71%|███████   | 709999/1000000 [13:06:20<3:53:39, 20.69it/s]global step 710000, trans_decision ep_re 90.93250319315932

{"global_step": 710000, "eval_re": [196.34492172220027, 40.92324785077326, 
48.707797907612296, 77.72782934046813, 212.0331174802752, 31.795839475522744, 
21.05768860024358, 58.37148918830076, 50.542743037033084, 171.8203573291638], 
"eval_len": [422, 277, 139, 193, 967, 110, 69, 121, 146, 450]}

 72%|███████▏  | 719999/1000000 [13:17:20<3:47:07, 20.55it/s]global step 720000, trans_decision ep_re 198.30286812737702

{"global_step": 720000, "eval_re": [299.5660725579795, 300.5268145386921, 
301.67397137170633, 285.74573622201945, 15.726078321732437, 39.67597042602756, 
224.14419207738877, 16.196766177566488, 326.82279364091875, 172.95028593973888],
"eval_len": [1000, 1000, 1000, 1000, 93, 59, 567, 30, 1000, 478]}

 73%|███████▎  | 729997/1000000 [13:28:30<3:37:14, 20.71it/s]global step 730000, trans_decision ep_re 39.34676997168617

{"global_step": 730000, "eval_re": [48.83190421863601, 92.81874969182428, 
20.345995653859617, 339.7343028191131, 428.81803570108815, -657.0345951118131, 
204.460510451113, 338.792702563225, 21.737047373337877, -445.0369536435222], 
"eval_len": [64, 159, 93, 1000, 1000, 1000, 719, 581, 146, 1000]}

 74%|███████▍  | 739999/1000000 [13:39:50<3:28:37, 20.77it/s]global step 740000, trans_decision ep_re 63.20964228280353

{"global_step": 740000, "eval_re": [48.001800925726826, 23.723200966441837, 
130.4225380420186, -22.33153740387724, 323.84743332231824, 7.756792460347756, 
23.189475026711722, 31.934274562032552, 42.74424249618004, 22.80820243013504], 
"eval_len": [119, 32, 227, 108, 1000, 38, 126, 36, 91, 124]}

 75%|███████▍  | 749998/1000000 [13:50:40<3:18:35, 20.98it/s]global step 750000, trans_decision ep_re 160.11893026855392

{"global_step": 750000, "eval_re": [333.4093342200857, 2.963196065537552, 
211.97880870003593, 420.544971535424, 265.2695045183855, 55.96384097450864, 
11.90867022052763, 15.782616633039568, 269.8818096248514, 13.486550193143275], 
"eval_len": [644, 26, 612, 869, 1000, 131, 32, 150, 715, 97]}

 76%|███████▌  | 759999/1000000 [14:01:50<3:13:38, 20.66it/s]global step 760000, trans_decision ep_re 213.19260493596508

{"global_step": 760000, "eval_re": [217.04572775181148, 186.81023225240256, 
254.76747044745298, 414.98264296214273, 358.42905449028115, 362.29888917971397, 
48.60899497673612, 5.543521730619146, 10.404289102902972, 273.03522646558764], 
"eval_len": [478, 442, 1000, 1000, 1000, 1000, 181, 60, 41, 1000]}

 77%|███████▋  | 769998/1000000 [14:13:00<3:04:20, 20.80it/s]global step 770000, trans_decision ep_re 96.49104385605486

{"global_step": 770000, "eval_re": [23.590819743029275, 204.24636750884633, 
88.27685307589707, 208.24621293178072, 74.76373638440892, 153.8431637601798, 
47.481316717277544, 135.75526328503193, 16.528406417039324, 12.178298737057453],
"eval_len": [46, 1000, 209, 1000, 171, 462, 165, 282, 267, 54]}

 78%|███████▊  | 779999/1000000 [14:24:10<2:56:21, 20.79it/s]global step 780000, trans_decision ep_re 72.63030649325167

{"global_step": 780000, "eval_re": [11.242263619257278, 51.717035616189804, 
37.171783978357496, -9.189834437590969, 92.42431019462273, 133.8802465833965, 
29.987960228787422, 20.412695563479776, 89.15848970898975, 269.4981138770269], 
"eval_len": [159, 111, 188, 149, 293, 372, 103, 32, 306, 663]}

 79%|███████▉  | 789999/1000000 [14:35:00<2:49:40, 20.63it/s]global step 790000, trans_decision ep_re 112.14847622858947

{"global_step": 790000, "eval_re": [1.711826132614371, -4.624322652377524, 
132.32137738237964, 331.64071270862746, 5.954253032542266, 62.389885417432566, 
280.39748455020396, 133.7940647729764, 161.73418747935568, 16.16529346213972], 
"eval_len": [53, 48, 379, 706, 33, 229, 1000, 270, 691, 1000]}

 80%|███████▉  | 799998/1000000 [14:46:10<2:38:55, 20.97it/s]global step 800000, trans_decision ep_re 178.29246578847136

{"global_step": 800000, "eval_re": [241.35978423039123, 25.10104329109286, 
55.926583241206586, 273.79266075821636, 232.13770005557743, 380.4982195138901, 
281.16780272603705, 44.668002804317965, 190.56270145814125, 57.71015980584301], 
"eval_len": [701, 46, 195, 1000, 1000, 1000, 1000, 79, 443, 409]}

 81%|████████  | 809999/1000000 [14:57:30<2:33:01, 20.69it/s]global step 810000, trans_decision ep_re 217.50708024886754

{"global_step": 810000, "eval_re": [225.279729174382, 74.3442241597353, 
113.60450762260436, 341.9380960717077, 350.39297465687696, 299.56437443008895, 
116.20359430336329, 298.52470232004043, 342.90882664869855, 12.309773101177448],
"eval_len": [633, 348, 411, 1000, 1000, 1000, 310, 1000, 1000, 24]}

 82%|████████▏ | 819999/1000000 [15:08:41<2:24:57, 20.70it/s]global step 820000, trans_decision ep_re 138.14682341215607

{"global_step": 820000, "eval_re": [28.564467495887612, 57.23311347880826, 
54.333706834881696, 345.7510688400801, 26.482946671740358, 34.07798019970585, 
228.35070051226455, 356.5908497307397, 79.85623399442348, 170.227166363029], 
"eval_len": [49, 269, 111, 1000, 51, 200, 486, 795, 202, 668]}

 83%|████████▎ | 829999/1000000 [15:19:41<2:16:55, 20.69it/s]global step 830000, trans_decision ep_re 100.5699459714611

{"global_step": 830000, "eval_re": [52.14147294071422, 26.899495182265458, 
108.31380270165694, 129.56151949247604, 223.39472388288206, 191.88857048302512, 
176.1192623893241, 10.946702381528294, 80.12197728014293, 6.311932980595857], 
"eval_len": [62, 110, 323, 334, 1000, 603, 316, 28, 152, 39]}

 84%|████████▍ | 839999/1000000 [15:30:41<2:08:25, 20.77it/s]global step 840000, trans_decision ep_re 106.98034478207516

{"global_step": 840000, "eval_re": [42.866704672993386, 22.202957239916003, 
53.82037645802074, 138.9454577390692, 8.438625495645113, 223.14718711107597, 
147.96473279643135, 17.66765432301807, 340.39881079032205, 74.35094119425972], 
"eval_len": [211, 40, 149, 334, 39, 1000, 441, 18, 1000, 225]}

 85%|████████▍ | 849999/1000000 [15:41:51<2:00:51, 20.69it/s]global step 850000, trans_decision ep_re 78.14869431370387

{"global_step": 850000, "eval_re": [175.2685066912044, 263.4030223253087, 
234.11295273285342, 22.816741189294067, 212.33267593306232, 297.4733443354816, 
220.70910933170526, 17.258856975296442, -955.2778108125381, 293.38954443537045],
"eval_len": [526, 1000, 1000, 111, 500, 1000, 1000, 41, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [15:53:11<1:52:29, 20.74it/s]global step 860000, trans_decision ep_re 180.4706818635118

{"global_step": 860000, "eval_re": [51.188706014858894, 248.71360943408135, 
361.83476910891613, 281.9786066080493, 162.1244901604283, 148.21774372841332, 
15.918978629856609, 131.51136455537645, 314.39284163964567, 88.82570875549206], 
"eval_len": [131, 572, 1000, 1000, 543, 369, 39, 398, 1000, 250]}

 87%|████████▋ | 869999/1000000 [16:04:11<1:44:33, 20.72it/s]global step 870000, trans_decision ep_re 142.49893686725443

{"global_step": 870000, "eval_re": [18.43965845547997, 309.38087739183993, 
13.34106350281167, 12.707985760731487, 36.57776696415482, 340.1569298618023, 
296.7358925427124, 260.0442094110159, 9.42096775338815, 128.18401702860777], 
"eval_len": [39, 1000, 109, 157, 116, 1000, 1000, 1000, 22, 433]}

 88%|████████▊ | 879998/1000000 [16:15:21<1:35:35, 20.92it/s]global step 880000, trans_decision ep_re 97.64861838760044

{"global_step": 880000, "eval_re": [105.66541521193672, 39.180903231827095, 
179.32085398359217, 139.55963997554215, 139.46032885380197, 5.538987323375049, 
38.73678334930603, 328.2953670922151, -12.846706266803915, 13.57461112121204], 
"eval_len": [233, 137, 579, 1000, 455, 39, 591, 1000, 30, 37]}

 89%|████████▉ | 889999/1000000 [16:26:31<1:28:09, 20.80it/s]global step 890000, trans_decision ep_re 160.07667870584035

{"global_step": 890000, "eval_re": [302.9292269757259, 313.24155915091416, 
155.7619809230056, 41.64676405030008, 335.69705123239714, 140.9639452338733, 
162.14063061654142, 72.30821795977607, 21.88256735754626, 54.19484355832347], 
"eval_len": [1000, 1000, 315, 131, 1000, 343, 350, 210, 38, 130]}

 90%|████████▉ | 899999/1000000 [16:37:31<1:21:10, 20.53it/s]global step 900000, trans_decision ep_re 177.47862710050686

{"global_step": 900000, "eval_re": [53.58371251466154, 325.0920995538043, 
164.59574245654937, 177.60437400238024, 29.821327905068166, 190.35497555957448, 
252.6631309456706, 79.43886236034042, 270.1454209927916, 231.48662471422773], 
"eval_len": [263, 1000, 691, 666, 1000, 368, 1000, 611, 1000, 560]}

 91%|█████████ | 909999/1000000 [16:48:51<1:12:15, 20.76it/s]global step 910000, trans_decision ep_re 150.73853579467018

{"global_step": 910000, "eval_re": [7.57043153764446, 306.34942064563757, 
169.40099430412366, 141.941264530665, 270.47963928699323, 46.13180393024435, 
57.73595344581381, 564.6062761374318, -13.339105001741478, -43.49132087011068], 
"eval_len": [35, 1000, 1000, 654, 1000, 95, 119, 1000, 39, 1000]}

 92%|█████████▏| 919998/1000000 [17:00:01<1:03:20, 21.05it/s]global step 920000, trans_decision ep_re 226.47056344373104

{"global_step": 920000, "eval_re": [364.2402300337973, 315.66308960635274, 
343.7946536473164, 294.17491748915626, 189.32278648605478, 288.6485964362361, 
46.250931531017535, 259.06003196766716, 35.03256768053753, 128.5178295591751], 
"eval_len": [1000, 1000, 856, 1000, 1000, 1000, 475, 1000, 93, 350]}

 93%|█████████▎| 929999/1000000 [17:11:21<56:38, 20.60it/s]global step 930000, trans_decision ep_re 96.43113281239327

{"global_step": 930000, "eval_re": [37.624622146056694, 70.52127259846087, 
89.62064437731624, 0.9644615929216429, 259.92992880892024, 151.26702302724806, 
109.98616219878144, 61.55974562236763, 74.47125884915039, 108.36620890270953], 
"eval_len": [38, 125, 188, 19, 722, 560, 163, 103, 141, 443]}

 94%|█████████▍| 939999/1000000 [17:22:21<48:05, 20.80it/s]global step 940000, trans_decision ep_re 106.82093569037227

{"global_step": 940000, "eval_re": [307.8986241836146, 229.24353404521977, 
129.52587250549493, 102.13140327119206, 187.00119548518373, 7.391677407999597, 
1.5656966122938445, -1.697402229342679, -5.842693031877563, 110.99144865394436],
"eval_len": [1000, 1000, 380, 515, 1000, 501, 82, 45, 35, 172]}

 95%|█████████▍| 949999/1000000 [17:33:21<40:00, 20.83it/s]global step 950000, trans_decision ep_re 159.39770850668805

{"global_step": 950000, "eval_re": [398.1681031464156, 331.460291847095, 
-2.239281254525511, 225.31306919392173, 260.8037720328901, 36.25488775123878, 
98.53885170946981, 192.34024330881448, 143.1185516325015, -89.78140430094128], 
"eval_len": [1000, 1000, 665, 1000, 1000, 67, 250, 1000, 233, 480]}

 96%|█████████▌| 959999/1000000 [17:44:51<32:38, 20.43it/s]global step 960000, trans_decision ep_re 164.08320237633473

{"global_step": 960000, "eval_re": [36.8172159532976, 101.84573426848888, 
53.35963843005306, 272.3544465783936, 141.5426707016247, 293.5818076430635, 
22.362374556773137, 210.86115969843112, 231.72296062331023, 276.3840153099114], 
"eval_len": [39, 269, 441, 1000, 359, 1000, 25, 480, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [17:56:11<24:28, 20.44it/s]global step 970000, trans_decision ep_re 126.17750766141913

{"global_step": 970000, "eval_re": [218.9455428818145, -113.25910338831986, 
36.903456468515316, 262.8174479994015, 353.13052984455084, 29.937941914661376, 
145.4801741533872, 139.99755695145186, 33.944708066890705, 153.87682172183779], 
"eval_len": [590, 836, 112, 529, 1000, 81, 449, 275, 198, 440]}

 98%|█████████▊| 979999/1000000 [18:07:31<16:13, 20.55it/s]global step 980000, trans_decision ep_re 141.46351402272035

{"global_step": 980000, "eval_re": [288.3353805290383, 75.0397446122617, 
169.57314154247365, 106.14232565335173, 136.3209489056926, 121.96242916006533, 
35.23988939352728, 129.0189425014292, 77.15603138286338, 275.84630654650016], 
"eval_len": [1000, 493, 410, 183, 256, 231, 103, 185, 246, 1000]}

 99%|█████████▉| 989999/1000000 [18:18:41<08:03, 20.68it/s]global step 990000, trans_decision ep_re 81.88919635474731

{"global_step": 990000, "eval_re": [492.48706284912805, -180.92238910951264, 
16.18666849819868, 20.610077866427986, 56.84110727574763, 299.17009117759386, 
32.92963490263125, 205.77621756042194, 261.92682337966016, -386.11333085282376],
"eval_len": [1000, 1000, 35, 107, 79, 1000, 302, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [18:30:01<00:00, 20.59it/s]global step 1000000, trans_decision ep_re 164.6183813820724

{"global_step": 1000000, "eval_re": [254.27530177143382, 93.93815750001013, 
494.84120192933534, 149.87410459478767, 84.68392996574161, 119.45571164069483, 
228.57384200881097, 48.901357215080225, 43.10007787452096, 128.54012932030858], 
"eval_len": [1000, 171, 1000, 300, 284, 419, 1000, 141, 252, 320]}

100%|██████████| 1000000/1000000 [18:30:16<00:00, 15.01it/s]
