
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9998/1000000 [04:40<10:40:48, 25.75it/s]global step 10000, trans_decision ep_re -101.94180549389779

{"global_step": 10000, "eval_re": [-4.893929958523696, 15.347219168788408, 
-296.30669746582066, -15.594633946687635, -45.475395654318454, 
-84.25218377516694, -36.18223230930836, -360.8592431235329, -161.99853585751111,
-29.202422016896598], "eval_len": [66, 91, 684, 35, 252, 193, 85, 1000, 1000, 
107]}

  2%|▏         | 19999/1000000 [13:30<10:47:30, 25.22it/s]global step 20000, trans_decision ep_re -15.374756599516928

{"global_step": 20000, "eval_re": [-10.094752204629279, 26.988175189386816, 
18.49766986178206, 9.094059484111767, 15.983498375192266, 17.46807398838847, 
19.608347540399446, 18.50281225196733, -209.00239006533306, -60.79306041643511],
"eval_len": [86, 90, 95, 40, 34, 95, 155, 90, 1000, 402]}

  3%|▎         | 29999/1000000 [22:20<10:41:34, 25.20it/s]global step 30000, trans_decision ep_re -19.818899200581477

{"global_step": 30000, "eval_re": [-30.115225761383524, -56.82214847477803, 
-23.90324466787103, 1.0293423609651464, 5.056988445178828, 12.938364849030652, 
12.766541967332843, -6.048918118282987, -27.254005656075947, 
-85.83668694993071], "eval_len": [1000, 66, 80, 92, 36, 69, 49, 68, 184, 1000]}

  4%|▍         | 39999/1000000 [31:10<10:31:28, 25.34it/s]global step 40000, trans_decision ep_re 7.485052030029948

{"global_step": 40000, "eval_re": [45.60960876075951, 8.183513115017659, 
2.0960549194198834, -34.89176209742698, 16.518466501444273, 10.1282441293334, 
9.642190985474649, 18.078536449169686, 11.837801923809096, -12.352134386701701],
"eval_len": [82, 24, 17, 140, 490, 33, 33, 394, 46, 116]}

  5%|▍         | 49999/1000000 [40:00<10:25:28, 25.31it/s]global step 50000, trans_decision ep_re -3.4933859224273363

{"global_step": 50000, "eval_re": [71.09980558574216, -40.68139925746197, 
-15.60043180647522, -72.80452802511222, -30.971350262085345, 
-109.35949134494139, 46.37359414543474, 2.6203354038940536, -6.004899917604733, 
120.39450625433659], "eval_len": [269, 108, 1000, 416, 824, 1000, 69, 61, 165, 
1000]}

  6%|▌         | 59999/1000000 [49:00<10:16:43, 25.40it/s]global step 60000, trans_decision ep_re 5.676412992955855

{"global_step": 60000, "eval_re": [-2.3753328224492254, 12.652239781494227, 
17.92094855587539, -1.1323971358675764, 24.333733473005143, 23.351147522492603, 
-47.99465890815644, 3.4429501085914467, 27.52284494793034, -0.9573455933573531],
"eval_len": [126, 1000, 36, 53, 40, 124, 163, 183, 1000, 32]}

  7%|▋         | 69999/1000000 [57:50<10:05:58, 25.58it/s]global step 70000, trans_decision ep_re 1.1327529381672128

{"global_step": 70000, "eval_re": [-14.938421857100204, -13.620549081024208, 
-28.79790110933988, 25.43454797834973, -15.463733222978934, 41.97272663552656, 
-31.34831109294153, 20.353686649977746, 45.60353136685811, -17.868046885655264],
"eval_len": [58, 121, 83, 172, 757, 164, 128, 257, 1000, 1000]}

  8%|▊         | 79998/1000000 [1:06:50<9:57:50, 25.65it/s]global step 80000, trans_decision ep_re 9.643576234272372

{"global_step": 80000, "eval_re": [-2.60322721133382, 3.282651835488634, 
9.90491209399184, 0.36051472123575523, 67.68770306103661, 9.377850862838834, 
78.91075262902619, -26.731060076693563, 29.875327836839915, -73.62966340970668],
"eval_len": [31, 33, 31, 211, 283, 113, 442, 164, 75, 135]}

  9%|▉         | 89999/1000000 [1:15:40<9:58:08, 25.36it/s]global step 90000, trans_decision ep_re 57.98228946220729

{"global_step": 90000, "eval_re": [-12.045677969623528, 36.98284094961624, 
21.71006695814169, 115.39357622089229, 68.49743700500039, 41.470830591464036, 
40.53582032829406, 38.46258896376658, 228.08670618462074, 0.7287053899003875], 
"eval_len": [58, 348, 33, 1000, 1000, 198, 116, 186, 1000, 133]}

 10%|▉         | 99997/1000000 [1:24:30<9:51:41, 25.35it/s]global step 100000, trans_decision ep_re 51.27722509993198

{"global_step": 100000, "eval_re": [21.854010293425883, 45.96024883124269, 
10.08822903993223, 30.40851214140243, -0.8203026049934445, 19.648201636865092, 
222.81399505998206, 17.204073685560587, 48.86856136544629, 96.74672155045596], 
"eval_len": [1000, 55, 1000, 232, 44, 1000, 1000, 31, 1000, 1000]}

 11%|█         | 109998/1000000 [1:33:30<9:37:06, 25.70it/s]global step 110000, trans_decision ep_re 78.18718964046914

{"global_step": 110000, "eval_re": [236.39504060350188, 26.72634478844811, 
90.34626672362646, 8.856693368165043, 99.09624794122168, 145.58736613239446, 
0.1325969271531955, 55.662457415583184, 35.94182991888174, 83.1270525857156], 
"eval_len": [1000, 63, 746, 25, 347, 1000, 88, 1000, 173, 566]}

 12%|█▏        | 119999/1000000 [1:42:30<9:38:55, 25.33it/s]global step 120000, trans_decision ep_re 63.496546305895365

{"global_step": 120000, "eval_re": [15.492976165417822, 12.162023928518288, 
140.24235341206122, 37.18979322779796, 125.8258046746337, -93.29170172544117, 
204.9144085811891, 144.3606682797318, 26.58971759714151, 21.47941891790348], 
"eval_len": [1000, 22, 502, 1000, 1000, 1000, 1000, 1000, 91, 36]}

 13%|█▎        | 129998/1000000 [1:51:40<9:26:24, 25.60it/s]global step 130000, trans_decision ep_re 123.14599670794539

{"global_step": 130000, "eval_re": [84.57348499211841, 80.33273258621806, 
245.5699626545152, 227.2020205569983, 34.3807380236294, 56.69899218640299, 
15.59041304226617, 201.51420578776307, 240.65309629189318, 44.94432095764905], 
"eval_len": [285, 254, 850, 872, 48, 1000, 126, 1000, 1000, 119]}

 14%|█▍        | 139998/1000000 [2:00:40<9:19:38, 25.61it/s]global step 140000, trans_decision ep_re 90.08407021020514

{"global_step": 140000, "eval_re": [28.414436154389985, 13.110653652045345, 
187.36088267444603, 106.19302730091556, 106.41527589428836, 4.540756439611332, 
110.371475219869, 56.614078339368255, 85.82943418897467, 201.99068223814285], 
"eval_len": [72, 29, 525, 1000, 221, 34, 538, 1000, 1000, 1000]}

 15%|█▍        | 149998/1000000 [2:09:40<9:12:14, 25.65it/s]global step 150000, trans_decision ep_re 58.20140214525069

{"global_step": 150000, "eval_re": [62.04971381722098, 48.15615873095529, 
-16.25957831674805, 156.72689823030055, 20.619658723248953, 161.48084727836383, 
46.69948646363015, -3.631441453301041, 12.284742155966745, 93.88753582286942], 
"eval_len": [183, 251, 42, 399, 22, 837, 97, 32, 58, 184]}

 16%|█▌        | 159999/1000000 [2:18:30<9:14:36, 25.24it/s]global step 160000, trans_decision ep_re 117.22825506721139

{"global_step": 160000, "eval_re": [6.8136028645458815, 240.64300648278137, 
152.66559269123047, 174.8812387513394, 26.88588656601277, 183.70318517209276, 
133.4525587259955, 154.67190604392266, 83.83458737108828, 14.730986003104766], 
"eval_len": [32, 814, 568, 1000, 202, 749, 637, 478, 278, 1000]}

 17%|█▋        | 169998/1000000 [2:27:30<8:59:23, 25.65it/s]global step 170000, trans_decision ep_re 105.28337593757735

{"global_step": 170000, "eval_re": [291.13676906749964, 159.17809247100507, 
24.15636946287814, 116.84211741456366, 34.78951621755793, 186.7802054921582, 
130.06524597887625, 6.644526826462846, 68.27346963821951, 34.967446806552246], 
"eval_len": [1000, 1000, 69, 435, 93, 690, 1000, 26, 217, 223]}

 18%|█▊        | 179999/1000000 [2:36:20<8:56:42, 25.46it/s]global step 180000, trans_decision ep_re 126.06308451688719

{"global_step": 180000, "eval_re": [0.7151079357695631, 100.34434479344065, 
137.7698872629924, 158.97078768832438, 98.77874898403884, 356.7146197857019, 
278.1948474929932, 115.83514229189066, 2.809265266180839, 10.498093667539642], 
"eval_len": [16, 245, 524, 1000, 267, 1000, 584, 198, 33, 87]}

 19%|█▉        | 189998/1000000 [2:45:20<8:42:26, 25.84it/s]global step 190000, trans_decision ep_re 167.97612376658728

{"global_step": 190000, "eval_re": [44.123634221282124, 108.9334935458088, 
122.04578193006245, 253.67704392140044, 70.43840477601057, 372.44917860691254, 
140.29679421606318, 201.62204379123762, 142.40964634655228, 223.76521631054268],
"eval_len": [188, 1000, 1000, 640, 151, 1000, 423, 621, 1000, 1000]}

 20%|█▉        | 199998/1000000 [2:54:20<8:41:44, 25.56it/s]global step 200000, trans_decision ep_re 100.9785018754009

{"global_step": 200000, "eval_re": [326.2617684231119, 110.82279850361039, 
76.96244648154251, 57.627016905993116, 77.92804113151405, 37.846761841443886, 
25.210170570427472, 185.76154897335223, 30.18557534546334, 81.17889057755008], 
"eval_len": [1000, 423, 1000, 122, 1000, 105, 53, 563, 101, 176]}

 21%|██        | 209997/1000000 [3:03:20<8:38:47, 25.38it/s]global step 210000, trans_decision ep_re 171.09340670703614

{"global_step": 210000, "eval_re": [40.66371457612571, 267.9873445691452, 
161.4362583822265, 294.49837332447163, 401.2097952093906, 43.19486031663586, 
317.77579703039197, 42.780724154185876, 124.42369677740909, 16.96350273037886], 
"eval_len": [104, 1000, 430, 901, 1000, 285, 1000, 213, 446, 73]}

 22%|██▏       | 219997/1000000 [3:12:20<8:33:23, 25.32it/s]global step 220000, trans_decision ep_re 102.14884421666576

{"global_step": 220000, "eval_re": [19.991245055420094, 77.2046466413372, 
188.91750217628692, 177.99445759943393, 8.75179964083816, 131.87856827726162, 
56.336531155070304, 18.65324909026437, 139.6866049586149, 202.07383757213023], 
"eval_len": [29, 321, 519, 418, 33, 293, 217, 52, 280, 844]}

 23%|██▎       | 229997/1000000 [3:21:10<8:24:47, 25.42it/s]global step 230000, trans_decision ep_re 148.97915492024478

{"global_step": 230000, "eval_re": [114.85319217573553, -3.784400513438889, 
191.76393836060083, 45.0328137031719, 265.08281297204167, 361.938982543786, 
227.26913062261545, 56.000928806406954, 197.5508507269038, 34.0832998046248], 
"eval_len": [241, 53, 1000, 65, 973, 1000, 1000, 158, 1000, 90]}

 24%|██▍       | 239997/1000000 [3:30:10<8:21:01, 25.28it/s]global step 240000, trans_decision ep_re 137.07312857185738

{"global_step": 240000, "eval_re": [25.78719956601134, 52.6669130270986, 
269.7098434401567, 115.90858968042816, 165.30017119049108, 301.5044963755602, 
44.169177326915005, 105.20877872529533, 148.1372255461164, 142.33889084050088], 
"eval_len": [66, 109, 1000, 441, 382, 1000, 84, 1000, 306, 1000]}

 25%|██▍       | 249997/1000000 [3:39:10<8:13:10, 25.35it/s]global step 250000, trans_decision ep_re 194.02310733467925

{"global_step": 250000, "eval_re": [182.523363434564, 107.51059536744378, 
337.77417867556034, 169.21432100055236, 284.70963867310104, 393.3176024826951, 
203.86138505027097, 26.39519271431228, 72.814605004944, 162.11019094334867], 
"eval_len": [541, 150, 1000, 1000, 1000, 1000, 1000, 95, 106, 261]}

 26%|██▌       | 259999/1000000 [3:48:10<8:04:01, 25.48it/s]global step 260000, trans_decision ep_re 106.00035187689093

{"global_step": 260000, "eval_re": [35.38629338804263, 230.37906945600002, 
82.5577270803322, 171.54850246759193, 144.35946604333296, 30.590403638916708, 
142.60998424364362, 192.66394920905338, 29.979931666386207, 
-0.07180842439046747], "eval_len": [96, 569, 203, 1000, 1000, 202, 1000, 1000, 
59, 19]}

 27%|██▋       | 269997/1000000 [3:57:10<8:00:52, 25.30it/s]global step 270000, trans_decision ep_re 78.27566394210467

{"global_step": 270000, "eval_re": [70.37732736183364, 37.899403343823444, 
1.9540259273200746, 2.387047068061931, 177.06879860205004, 36.001256963194045, 
2.1503500038721017, 44.491820033114834, 64.28340145691557, 346.1432086608611], 
"eval_len": [109, 43, 15, 16, 535, 32, 30, 76, 390, 1000]}

 28%|██▊       | 279997/1000000 [4:06:00<7:52:40, 25.39it/s]global step 280000, trans_decision ep_re 99.19160112108051

{"global_step": 280000, "eval_re": [263.7566679631874, 38.79862455187629, 
10.105297720875196, 183.27912342794878, 14.097498992348704, 71.61820787800501, 
42.33880742835226, 118.35773594440609, 209.0323196213454, 40.5317276824599], 
"eval_len": [819, 75, 31, 1000, 15, 144, 234, 260, 755, 137]}

 29%|██▉       | 289998/1000000 [4:14:50<7:38:53, 25.79it/s]global step 290000, trans_decision ep_re 149.50722548542768

{"global_step": 290000, "eval_re": [69.70033101106387, 63.49470303567602, 
166.7276174608188, 236.3798372330725, 231.87222653677344, 319.6840862150047, 
186.7204880006444, 166.62509206159476, 28.125793829706684, 25.742079469921464], 
"eval_len": [119, 227, 1000, 769, 1000, 1000, 1000, 441, 82, 48]}

 30%|██▉       | 299999/1000000 [4:23:50<7:36:28, 25.56it/s]global step 300000, trans_decision ep_re 112.49685405007656

{"global_step": 300000, "eval_re": [18.37335261634965, 188.12302009366906, 
146.6298648293927, 421.6167903594366, -4.221858487348556, 230.3432514667759, 
19.73333594654867, 45.31603994896488, 40.77251188975494, 18.282231837221754], 
"eval_len": [101, 320, 479, 1000, 38, 577, 31, 170, 159, 35]}

 31%|███       | 309998/1000000 [4:32:40<7:21:38, 26.04it/s]global step 310000, trans_decision ep_re 77.72538924993684

{"global_step": 310000, "eval_re": [84.30446450250342, 163.0238733228523, 
116.75104895615077, 173.43545501800872, 34.805740260576435, 188.64335407798254, 
0.6001593281251294, 0.07344588329367518, 5.035276268945678, 10.581074880929572],
"eval_len": [259, 373, 174, 405, 72, 344, 1000, 29, 34, 33]}

 32%|███▏      | 319997/1000000 [4:41:30<7:26:11, 25.40it/s]global step 320000, trans_decision ep_re 237.022288150445

{"global_step": 320000, "eval_re": [200.64730618820616, 167.3937115700559, 
453.10994063808505, 142.7195487887038, 512.900822615965, 254.19166459137048, 
313.2909580055323, 25.863422275379964, 2.4407485141665117, 297.66475831698534], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 636, 66, 32, 478]}

 33%|███▎      | 329997/1000000 [4:50:30<7:17:48, 25.51it/s]global step 330000, trans_decision ep_re 296.885529337713

{"global_step": 330000, "eval_re": [50.49301756808341, 155.22094830582023, 
490.77301151781967, 370.8596298511244, 335.17899058764846, 296.22467380685106, 
164.3213891051405, 398.161112707507, 434.0646607078793, 273.5578592192559], 
"eval_len": [75, 382, 1000, 937, 837, 534, 451, 1000, 1000, 967]}

 34%|███▍      | 339999/1000000 [4:59:30<7:07:50, 25.71it/s]global step 340000, trans_decision ep_re 210.24388886249162

{"global_step": 340000, "eval_re": [383.832484961892, 137.9788740657725, 
72.54558943576008, 234.53641420291453, 72.22046898567802, 12.702618175306227, 
98.66448967734999, 452.7731255885902, 221.9259510923799, 415.2588724392729], 
"eval_len": [1000, 1000, 272, 1000, 316, 38, 398, 1000, 573, 1000]}

 35%|███▍      | 349999/1000000 [5:08:30<7:01:35, 25.70it/s]global step 350000, trans_decision ep_re 140.6303379872558

{"global_step": 350000, "eval_re": [187.46501306239935, 94.58005138597076, 
211.57244070919592, 15.430754523425394, 14.121680712054182, 83.89841153879269, 
91.7495075698717, 395.87427192112193, 243.99236029136932, 67.61888815835657], 
"eval_len": [1000, 233, 558, 33, 56, 256, 386, 916, 1000, 189]}

 36%|███▌      | 359997/1000000 [5:17:20<6:56:22, 25.62it/s]global step 360000, trans_decision ep_re 110.08779168452938

{"global_step": 360000, "eval_re": [182.4133174049273, 245.32886397227193, 
190.2745399713774, 22.963471396632766, 6.143011232112699, 213.45146450424033, 
144.69601672068436, 61.86321853513353, 8.403201951973855, 25.34081115593949], 
"eval_len": [564, 331, 1000, 28, 27, 428, 355, 105, 47, 123]}

 37%|███▋      | 369999/1000000 [5:26:10<6:49:39, 25.63it/s]global step 370000, trans_decision ep_re 120.61751398521555

{"global_step": 370000, "eval_re": [18.12429462431093, 119.47289448591528, 
130.61358659826013, 127.16514632217185, 128.0053486599957, 288.3068549869036, 
240.73541234767936, 106.35979724927228, 46.33986726194626, 1.0519373157000613], 
"eval_len": [47, 199, 301, 232, 251, 1000, 487, 185, 98, 66]}

 38%|███▊      | 379999/1000000 [5:35:00<6:41:44, 25.72it/s]global step 380000, trans_decision ep_re 159.85452536740237

{"global_step": 380000, "eval_re": [121.04996218178536, 207.29943829174002, 
300.81544920973835, 272.9366083789367, 240.8322542534072, 75.01487792329772, 
105.34388246662509, 21.668454521135608, 239.26786160991006, 14.316464837447507],
"eval_len": [293, 454, 580, 583, 436, 111, 242, 62, 1000, 18]}

 39%|███▉      | 389999/1000000 [5:43:50<6:33:48, 25.82it/s]global step 390000, trans_decision ep_re 166.5547256050158

{"global_step": 390000, "eval_re": [295.3961676465398, 77.93721843154619, 
4.7100053943826685, 261.6085394403865, 146.4319093999834, 32.55567777511508, 
16.152894094865236, 210.57563322352124, 435.99452060660803, 184.1846900372097], 
"eval_len": [1000, 342, 32, 821, 190, 103, 18, 1000, 726, 1000]}

 40%|███▉      | 399997/1000000 [5:52:40<6:30:44, 25.59it/s]global step 400000, trans_decision ep_re 113.88684049599583

{"global_step": 400000, "eval_re": [157.85838247297875, 141.97476122620037, 
15.553648793769703, 196.59371256536525, 26.930207745575487, 98.50717871053082, 
348.095231491387, -9.12100573269261, 125.45730582880502, 37.01898185803873], 
"eval_len": [411, 493, 55, 394, 49, 257, 825, 126, 1000, 166]}

 41%|████      | 409998/1000000 [6:01:30<6:19:25, 25.92it/s]global step 410000, trans_decision ep_re 145.30115067738873

{"global_step": 410000, "eval_re": [13.321156583218878, 177.17098186617457, 
466.57389459611056, 95.33091424630588, 68.08325519312812, 14.679634572339511, 
157.7639423488803, 83.77419723840706, 103.01897628841856, 273.2945538409039], 
"eval_len": [16, 1000, 1000, 338, 298, 36, 321, 236, 261, 433]}

 42%|████▏     | 419998/1000000 [6:10:20<6:10:55, 26.06it/s]global step 420000, trans_decision ep_re 71.68066040882702

{"global_step": 420000, "eval_re": [30.390479602606014, 16.264770795902166, 
-5.5732035978726895, 79.03865504942104, 6.3910275176951155, 113.91641423409861, 
65.22283199895728, 11.83764519595236, 199.0999126987171, 200.21807059279325], 
"eval_len": [99, 31, 38, 115, 74, 1000, 144, 29, 516, 643]}

 43%|████▎     | 429998/1000000 [6:19:00<6:05:04, 26.02it/s]global step 430000, trans_decision ep_re 167.95081157265668

{"global_step": 430000, "eval_re": [129.23391238709257, 226.85710746754975, 
496.77918151842283, 155.92090581585535, 338.1117527235289, 0.8038234134113209, 
24.255020379666192, 166.86188973369036, 115.08820499316982, 25.596317294179737],
"eval_len": [265, 529, 1000, 296, 1000, 54, 387, 204, 241, 107]}

 44%|████▍     | 439998/1000000 [6:27:50<5:58:42, 26.02it/s]global step 440000, trans_decision ep_re 155.68697366458963

{"global_step": 440000, "eval_re": [112.98371148464193, 44.52826227617087, 
156.38207624518841, 192.19699921319642, 144.3248584750078, -3.649331495445949, 
206.62130109904095, 362.8519143229709, 327.1519780320409, 13.477966993083939], 
"eval_len": [337, 86, 228, 302, 287, 46, 270, 1000, 1000, 34]}

 45%|████▍     | 449999/1000000 [6:36:40<6:04:05, 25.18it/s]global step 450000, trans_decision ep_re 150.5976298080396

{"global_step": 450000, "eval_re": [329.010583534692, 404.25569564051574, 
238.4188023982973, 10.65228882492551, 145.1376100923177, 103.1418902154358, 
-200.74740439120086, 156.00559521346108, 27.396539969288973, 
292.70469658266313], "eval_len": [618, 925, 496, 21, 466, 411, 1000, 343, 178, 
1000]}

 46%|████▌     | 459999/1000000 [6:45:40<5:51:10, 25.63it/s]global step 460000, trans_decision ep_re 52.81048457469261

{"global_step": 460000, "eval_re": [145.64496263283266, 5.810700907394104, 
2.949236968045643, 92.61538433031282, 92.6239166416367, 54.850659185792956, 
0.7012475143001224, 111.75366722951189, 1.284960108037292, 19.870110229061925], 
"eval_len": [431, 145, 177, 400, 206, 140, 32, 1000, 38, 893]}

 47%|████▋     | 469997/1000000 [6:54:20<5:40:47, 25.92it/s]global step 470000, trans_decision ep_re 63.600240075917874

{"global_step": 470000, "eval_re": [23.271893511263556, 12.975750984935853, 
35.35336040719671, 243.29926667208886, 125.89462027974517, 9.273965995348416, 
15.83595654603341, -111.61771212951793, 22.60874139947576, 259.10655709260885], 
"eval_len": [60, 31, 80, 344, 289, 24, 33, 516, 26, 522]}

 48%|████▊     | 479999/1000000 [7:03:10<5:38:30, 25.60it/s]global step 480000, trans_decision ep_re 87.4372898700295

{"global_step": 480000, "eval_re": [60.33222567556499, 33.3397943822117, 
44.08861438663083, 48.53979712416132, 149.35917575708405, 160.02448484591136, 
20.423176342535857, 213.9066376938022, 1.7094665490522942, 142.64952594334028], 
"eval_len": [222, 67, 58, 90, 379, 374, 30, 491, 63, 1000]}

 49%|████▉     | 489999/1000000 [7:11:50<5:30:46, 25.70it/s]global step 490000, trans_decision ep_re 82.68076862905575

{"global_step": 490000, "eval_re": [104.87594622182094, 9.3248708423586, 
290.04347901238907, 37.26295274892301, 111.49981427527796, 76.67687642256575, 
-200.37358244659805, 18.594764217653022, 95.60233588686054, 283.30022910930666],
"eval_len": [188, 31, 1000, 90, 152, 213, 1000, 124, 345, 1000]}

 50%|████▉     | 499998/1000000 [7:20:40<5:19:02, 26.12it/s]global step 500000, trans_decision ep_re 188.06035740730917

{"global_step": 500000, "eval_re": [361.3582614230301, 309.48141347482056, 
353.2033483273995, 182.74015919681557, 58.72204875436825, 31.836714519674835, 
10.805843377199526, 50.370731012510085, 139.96697288347798, 382.11808110379513],
"eval_len": [1000, 467, 1000, 402, 96, 65, 63, 109, 219, 1000]}

 51%|█████     | 509998/1000000 [7:29:30<5:11:59, 26.18it/s]global step 510000, trans_decision ep_re 46.81236534825248

{"global_step": 510000, "eval_re": [42.81243711859708, 94.73053357966042, 
18.298717175339355, 200.9015016006253, 17.80622052207474, 27.890912269475372, 
8.154491488124693, -295.3718143190911, 171.33749665383283, 181.5631573938861], 
"eval_len": [118, 211, 62, 1000, 23, 97, 19, 1000, 332, 189]}

 52%|█████▏    | 519997/1000000 [7:38:20<5:10:47, 25.74it/s]global step 520000, trans_decision ep_re 182.46841270514858

{"global_step": 520000, "eval_re": [115.80268332592223, 42.81372207137249, 
58.56434666598456, 103.27139070708073, 479.6606559281121, 13.391419557548325, 
347.34056422157585, 16.21934505162434, 438.25465321060403, 209.36534631166128], 
"eval_len": [245, 212, 149, 1000, 868, 32, 617, 65, 964, 436]}

 53%|█████▎    | 529997/1000000 [7:47:10<5:04:31, 25.72it/s]global step 530000, trans_decision ep_re 176.9267810863332

{"global_step": 530000, "eval_re": [255.61705110373816, 344.27272818987535, 
20.458007665047987, 237.89781806280868, 360.9112046366186, -9.888887146616172, 
21.58073225876492, 287.13399073970993, 7.902836047018706, 243.38232930636573], 
"eval_len": [1000, 824, 30, 1000, 1000, 30, 29, 540, 27, 887]}

 54%|█████▍    | 539999/1000000 [7:56:00<4:57:59, 25.73it/s]global step 540000, trans_decision ep_re 143.27147126885507

{"global_step": 540000, "eval_re": [11.855032349882356, 269.58309890324114, 
117.67426264375293, 117.78917162293381, 180.50194142943923, 83.14435312561636, 
47.70493533423673, 230.4848886108921, 249.35469288923983, 124.62233577931613], 
"eval_len": [21, 1000, 168, 215, 285, 127, 205, 1000, 321, 248]}

 55%|█████▍    | 549998/1000000 [8:04:50<4:49:05, 25.94it/s]global step 550000, trans_decision ep_re 108.9275185520406

{"global_step": 550000, "eval_re": [20.927326320286184, 183.93852091947085, 
-2.0732498377123627, 9.133229825791293, 27.529565836690526, 71.60284026757161, 
40.725238778070675, 461.86122869867035, 281.095882850219, -5.465398138652077], 
"eval_len": [39, 370, 27, 20, 154, 132, 73, 1000, 1000, 101]}

 56%|█████▌    | 559998/1000000 [8:13:40<4:41:34, 26.04it/s]global step 560000, trans_decision ep_re 101.90803392793642

{"global_step": 560000, "eval_re": [230.905723675361, 37.97215478699131, 
278.2222463095029, 216.36507636765998, 16.628764155394546, 13.373821786364337, 
24.214550048170583, 22.92165521765638, 44.16657782242289, 134.3097691098401], 
"eval_len": [1000, 77, 1000, 532, 41, 34, 151, 26, 64, 334]}

 57%|█████▋    | 569999/1000000 [8:22:30<4:40:28, 25.55it/s]global step 570000, trans_decision ep_re 126.74616867660048

{"global_step": 570000, "eval_re": [143.8688384541667, 125.72402473057303, 
188.37478741019598, 49.01246544114982, 58.01621118206309, 172.79714848636166, 
241.38299327354358, 249.85855956675516, 4.33892666201908, 34.08773155917664], 
"eval_len": [439, 336, 1000, 102, 160, 1000, 707, 686, 31, 168]}

 58%|█████▊    | 579997/1000000 [8:31:20<4:32:20, 25.70it/s]global step 580000, trans_decision ep_re 158.289802627023

{"global_step": 580000, "eval_re": [275.7063175543949, 121.03239686020093, 
86.4049217022957, 214.5940263540834, 69.27997035393712, 149.87217888542486, 
251.35635266005934, 123.23480592506009, 51.22402122059459, 240.19303475417888], 
"eval_len": [552, 284, 202, 513, 131, 375, 1000, 331, 187, 1000]}

 59%|█████▉    | 589999/1000000 [8:40:10<4:25:52, 25.70it/s]global step 590000, trans_decision ep_re 73.19614100745925

{"global_step": 590000, "eval_re": [51.518055894626656, 261.19212538099026, 
61.507880870796555, 65.00861033368018, -39.08608488884086, 55.42898288205232, 
23.611592748912244, 58.98911872518587, 13.017860258975238, 180.77326786821394], 
"eval_len": [163, 1000, 118, 101, 1000, 165, 22, 303, 147, 635]}

 60%|█████▉    | 599997/1000000 [8:49:00<4:19:43, 25.67it/s]global step 600000, trans_decision ep_re 88.514498913935

{"global_step": 600000, "eval_re": [247.00701008225354, 101.59078928613103, 
90.24657072744716, 36.98990698759949, 82.86017208845759, 154.16506584453154, 
66.27997773185173, 63.392999204322436, 26.5587175174663, 16.053779669289266], 
"eval_len": [1000, 304, 119, 1000, 335, 596, 265, 252, 198, 33]}

 61%|██████    | 609999/1000000 [8:57:50<4:14:06, 25.58it/s]global step 610000, trans_decision ep_re 100.15026942187922

{"global_step": 610000, "eval_re": [122.95301993873598, -1.968688292174417, 
10.728274646291863, 44.43372919211769, 73.31755547554384, 114.00737286739741, 
21.962252312524463, 264.7762580518683, 210.62160571614407, 140.671314310343], 
"eval_len": [227, 33, 38, 149, 245, 299, 190, 1000, 459, 316]}

 62%|██████▏   | 619998/1000000 [9:06:40<4:02:20, 26.13it/s]global step 620000, trans_decision ep_re 146.52288941091268

{"global_step": 620000, "eval_re": [7.925881663643825, 37.74651384882828, 
7.686708062927435, 378.1735837174362, 163.66751053485015, 356.5303364961009, 
152.04110156830546, 249.4636462263379, 108.72182602253869, 3.2717859681580355], 
"eval_len": [78, 136, 36, 1000, 419, 1000, 318, 1000, 265, 28]}

 63%|██████▎   | 629997/1000000 [9:15:30<3:59:00, 25.80it/s]global step 630000, trans_decision ep_re 246.04631118580755

{"global_step": 630000, "eval_re": [298.2071171626816, 310.2887887612491, 
322.389988171541, 156.81140135288715, 342.5600748375196, 190.0353465802313, 
155.6238803379183, 119.4886715634057, 228.59838606274957, 336.4594570278922], 
"eval_len": [1000, 1000, 822, 317, 1000, 494, 293, 199, 1000, 1000]}

 64%|██████▍   | 639997/1000000 [9:24:30<3:53:34, 25.69it/s]global step 640000, trans_decision ep_re 194.65742562226913

{"global_step": 640000, "eval_re": [14.439720552558438, 18.096638785700577, 
220.4431792580272, 148.0571454256281, 319.3414987462494, 265.9104993095972, 
498.5301681867278, 198.56004290806143, 200.7831919468622, 62.41217110327873], 
"eval_len": [33, 34, 1000, 342, 731, 1000, 1000, 494, 1000, 101]}

 65%|██████▍   | 649999/1000000 [9:33:30<3:45:43, 25.84it/s]global step 650000, trans_decision ep_re 175.70516953206712

{"global_step": 650000, "eval_re": [14.084091430071176, -1.087161435047614, 
546.0229555713921, 32.30568038580864, -8.755653535887042, 467.0233116626773, 
119.08290339745808, 307.4764955333777, 41.90718273638991, 238.99188957443116], 
"eval_len": [19, 28, 1000, 153, 48, 1000, 173, 543, 194, 1000]}

 66%|██████▌   | 659999/1000000 [9:42:20<3:39:40, 25.80it/s]global step 660000, trans_decision ep_re 217.79104680608597

{"global_step": 660000, "eval_re": [284.86471960356994, 392.0780568033906, 
236.84492409756723, 212.34099174738986, 68.54656528344223, 279.90465539393733, 
122.07250115410493, 200.2863610165049, 275.8519601111883, 105.11973284976443], 
"eval_len": [630, 1000, 512, 310, 151, 562, 216, 535, 1000, 267]}

 67%|██████▋   | 669999/1000000 [9:51:10<3:34:37, 25.63it/s]global step 670000, trans_decision ep_re 153.81993961228923

{"global_step": 670000, "eval_re": [258.5967936553522, 501.8667855680829, 
72.83672515661509, 224.14264210654235, 83.52601220908126, 11.51564835152279, 
85.9341560758765, 51.851665092889064, 2.6530031775509784, 245.27596472937924], 
"eval_len": [1000, 1000, 119, 1000, 166, 70, 129, 121, 38, 464]}

 68%|██████▊   | 679998/1000000 [10:00:00<3:24:46, 26.05it/s]global step 680000, trans_decision ep_re 174.9090708622545

{"global_step": 680000, "eval_re": [240.3014507276103, 482.8038794013778, 
300.5895904171346, 153.57257267928716, 67.30607794140381, 155.80672623844345, 
4.280410098084182, 262.04025358846656, 22.821008269780815, 59.56873926095614], 
"eval_len": [700, 939, 1000, 306, 109, 647, 104, 1000, 24, 177]}

 69%|██████▉   | 689999/1000000 [10:09:00<3:21:52, 25.59it/s]global step 690000, trans_decision ep_re 142.5661053199706

{"global_step": 690000, "eval_re": [3.3956976804016286, 10.052003591701828, 
10.719130585728013, 35.01220014493088, 305.5623865337153, 364.7686878123566, 
18.670602206667667, 281.98755168207833, 117.60388567770582, 277.88890728442], 
"eval_len": [48, 20, 34, 89, 689, 705, 81, 962, 176, 560]}

 70%|██████▉   | 699997/1000000 [10:17:40<3:15:19, 25.60it/s]global step 700000, trans_decision ep_re 175.27664463270017

{"global_step": 700000, "eval_re": [156.98009659405162, 221.68050390009313, 
126.89573043727201, 63.11537004220992, 36.955638469650005, 233.52825259312436, 
281.561358187979, 60.16286963552124, 382.5028340827152, 189.38379238438517], 
"eval_len": [302, 410, 238, 136, 89, 1000, 1000, 154, 1000, 338]}

 71%|███████   | 709999/1000000 [10:26:40<3:08:49, 25.60it/s]global step 710000, trans_decision ep_re 89.20509858897914

{"global_step": 710000, "eval_re": [12.804388463479869, 25.475538093838896, 
18.549426572733083, 63.137141085459525, 46.70225731757548, 66.33536638115055, 
95.43064493798505, 181.3066760806171, 120.2995957512352, 262.00995120571656], 
"eval_len": [40, 38, 64, 128, 275, 137, 282, 349, 389, 1000]}

 72%|███████▏  | 719998/1000000 [10:35:20<2:58:45, 26.11it/s]global step 720000, trans_decision ep_re 137.01160531703377

{"global_step": 720000, "eval_re": [171.01957047713358, 9.610602692875675, 
182.04339088438715, 45.44633481966353, 310.68137224501066, 145.68085525042557, 
68.08852173645771, 202.81656246993757, 228.44557966076127, 6.283262933684811], 
"eval_len": [663, 57, 1000, 95, 1000, 622, 176, 1000, 1000, 32]}

 73%|███████▎  | 729998/1000000 [10:44:20<2:54:46, 25.75it/s]global step 730000, trans_decision ep_re 216.75463257524376

{"global_step": 730000, "eval_re": [99.28242784151173, 109.95580331148847, 
231.08203815857763, 274.64274879603647, 206.8739828961113, 274.6958229229817, 
465.9692966762346, 19.982701128602063, 203.7638249942698, 281.2976790266238], 
"eval_len": [227, 288, 526, 754, 1000, 1000, 1000, 74, 1000, 1000]}

 74%|███████▍  | 739998/1000000 [10:53:20<2:48:03, 25.78it/s]global step 740000, trans_decision ep_re 151.42470662218008

{"global_step": 740000, "eval_re": [-0.7887200187038851, 125.25071690532033, 
281.29242666860586, 68.3211272887041, 93.16722067516066, 41.10600297598681, 
57.85257843677021, 462.2347153166827, 36.57188624499722, 349.23911172827707], 
"eval_len": [30, 212, 1000, 163, 157, 120, 218, 890, 91, 1000]}

 75%|███████▍  | 749998/1000000 [11:02:10<2:40:41, 25.93it/s]global step 750000, trans_decision ep_re 121.43750492210995

{"global_step": 750000, "eval_re": [24.869739801321963, 147.0238157612054, 
86.40489228396467, 35.64476396528628, 53.69366621548377, 315.35403335565337, 
68.11667977969711, 19.518323419894084, 214.25476591787165, 249.4943687207212], 
"eval_len": [49, 360, 160, 144, 115, 1000, 136, 32, 1000, 654]}

 76%|███████▌  | 759998/1000000 [11:11:00<2:33:52, 25.99it/s]global step 760000, trans_decision ep_re 198.2675918268855

{"global_step": 760000, "eval_re": [274.9937276834843, 39.67094254660473, 
148.81236967155448, 248.96695311240197, 85.57993130004378, 5.574848747585399, 
359.4433939720064, 293.85082502672117, 333.98104570383174, 191.80188050462124], 
"eval_len": [1000, 116, 409, 601, 291, 49, 715, 857, 1000, 410]}

 77%|███████▋  | 769998/1000000 [11:19:50<2:27:22, 26.01it/s]global step 770000, trans_decision ep_re 181.51997191056904

{"global_step": 770000, "eval_re": [152.4723931752994, 273.72198688694687, 
146.3406056379526, 125.11254438658581, 18.321549316484234, 216.39571420145862, 
275.6811889786681, 15.141761508150164, 320.66136289404875, 271.35061212009595], 
"eval_len": [471, 1000, 256, 578, 32, 505, 619, 74, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [11:28:50<2:20:46, 26.05it/s]global step 780000, trans_decision ep_re 113.23022500477641

{"global_step": 780000, "eval_re": [19.702572840023755, 278.89389786573804, 
108.0936059848461, 89.00360858883198, 144.65805531338052, 124.36343165795921, 
-1.2338709087566855, 99.07820887429239, 227.2866856633985, 42.45605416805031], 
"eval_len": [91, 1000, 361, 489, 366, 350, 39, 246, 639, 101]}

 79%|███████▉  | 789999/1000000 [11:37:40<2:16:20, 25.67it/s]global step 790000, trans_decision ep_re 77.53403089316745

{"global_step": 790000, "eval_re": [33.97930756815899, 243.91815494226373, 
123.64646084805975, -514.1344850074684, 190.62036180114586, 229.66795971271628, 
214.253678354434, 5.662564523350458, 234.12532267433613, 13.600983514677726], 
"eval_len": [87, 1000, 718, 1000, 617, 1000, 605, 94, 1000, 60]}

 80%|███████▉  | 799999/1000000 [11:46:30<2:10:18, 25.58it/s]global step 800000, trans_decision ep_re 161.00133568891684

{"global_step": 800000, "eval_re": [218.29999195720214, 54.33959617336557, 
22.250674937885133, 57.50462000568633, 9.757492952173504, 371.1347976794736, 
329.55033596845385, 9.328705098927358, 236.77891581539436, 301.0682263006065], 
"eval_len": [465, 568, 33, 155, 55, 789, 1000, 12, 1000, 903]}

 81%|████████  | 809997/1000000 [11:55:30<2:03:21, 25.67it/s]global step 810000, trans_decision ep_re 165.51433013324007

{"global_step": 810000, "eval_re": [99.39746646075487, 173.00768690213226, 
162.19621283791108, 300.76101848332166, 60.88577371887557, 16.283295879346184, 
146.38167917892352, 100.08729477972884, 277.34374316815337, 318.7991299232531], 
"eval_len": [295, 624, 317, 761, 207, 35, 458, 325, 1000, 1000]}

 82%|████████▏ | 819998/1000000 [12:04:20<1:55:01, 26.08it/s]global step 820000, trans_decision ep_re 8.895198920664564

{"global_step": 820000, "eval_re": [107.79108470419969, -212.07924248637917, 
286.94551768897816, -436.6821099187795, -450.03650686682886, 55.19703534062826, 
330.0863136148635, 172.47170788304913, 185.87385216742388, 49.38433707949053], 
"eval_len": [943, 563, 1000, 1000, 1000, 147, 1000, 796, 1000, 81]}

 83%|████████▎ | 829999/1000000 [12:13:20<1:52:13, 25.25it/s]global step 830000, trans_decision ep_re 195.05480001272153

{"global_step": 830000, "eval_re": [81.9906757377525, 315.1145858820915, 
18.62747202237487, 362.8359498788197, 273.14916580876405, 244.3008112300762, 
115.58884344197057, 220.11762141378048, 12.411801731781742, 306.41107297980386],
"eval_len": [159, 1000, 30, 778, 1000, 480, 245, 490, 40, 1000]}

 84%|████████▍ | 839998/1000000 [12:22:20<1:43:08, 25.85it/s]global step 840000, trans_decision ep_re 195.05388966859846

{"global_step": 840000, "eval_re": [506.4449801668996, 235.56008998559395, 
112.11864510477484, 270.6013627676718, 179.33328886290522, 323.6402387032606, 
5.962944454811545, 334.0567850113022, -119.23159409809804, 102.05215572686296], 
"eval_len": [1000, 1000, 293, 564, 750, 558, 55, 1000, 1000, 185]}

 85%|████████▍ | 849999/1000000 [12:31:20<1:38:11, 25.46it/s]global step 850000, trans_decision ep_re 164.33188295629435

{"global_step": 850000, "eval_re": [109.46046384100381, 54.253986114220154, 
267.5783427799431, 285.8266418083409, 163.77708921762297, 222.69067250339938, 
334.886893833003, 9.918799921249432, 26.227467299946046, 168.69847224421437], 
"eval_len": [315, 272, 1000, 1000, 353, 1000, 1000, 31, 52, 493]}

 86%|████████▌ | 859999/1000000 [12:40:10<1:31:02, 25.63it/s]global step 860000, trans_decision ep_re 232.0859683581869

{"global_step": 860000, "eval_re": [148.520632743172, 44.25371797386287, 
121.4522624825385, 441.08632759516155, 257.96230328370046, 95.4243655138049, 
212.99203937529114, -4.05556003154288, 486.70392620774913, 516.5196684381312], 
"eval_len": [367, 130, 346, 1000, 621, 325, 301, 1000, 931, 1000]}

 87%|████████▋ | 869997/1000000 [12:49:10<1:24:40, 25.59it/s]global step 870000, trans_decision ep_re 188.46042398432718

{"global_step": 870000, "eval_re": [32.89132095231477, 213.3122060833677, 
70.97345824577344, 142.61808603538694, 303.24981110295244, 39.62551312923504, 
416.26175916941037, 227.41713599830555, 335.57818822109544, 102.67676090543002],
"eval_len": [103, 593, 86, 184, 1000, 104, 971, 441, 1000, 332]}

 88%|████████▊ | 879999/1000000 [12:58:00<1:17:45, 25.72it/s]global step 880000, trans_decision ep_re 237.34522909788265

{"global_step": 880000, "eval_re": [220.31552539130664, 311.28525062650164, 
301.1217274859775, -20.125305194499074, 390.4312840932472, 357.48081717991545, 
211.20511860277196, 190.97361494143024, 402.5559099481532, 8.208347904021519], 
"eval_len": [1000, 1000, 1000, 1000, 865, 1000, 419, 1000, 1000, 37]}

 89%|████████▉ | 889999/1000000 [13:07:10<1:11:29, 25.64it/s]global step 890000, trans_decision ep_re 131.25356998180945

{"global_step": 890000, "eval_re": [9.500933656890174, 354.7617814288366, 
17.845342609922728, 102.08102476494497, 66.0982915480656, 79.2990149532654, 
22.463945383413876, 288.0357885223816, 185.55780604695073, 186.89177090342292], 
"eval_len": [11, 762, 36, 403, 161, 112, 23, 520, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [13:16:00<1:04:44, 25.74it/s]global step 900000, trans_decision ep_re 169.0749711162377

{"global_step": 900000, "eval_re": [171.32574595576986, 47.86848490798455, 
25.185779048465594, 105.97554727062264, 54.90848949211338, 435.1481954000831, 
374.1124173718287, 216.25527120819314, 235.12666248179863, 24.843118025517565], 
"eval_len": [1000, 92, 55, 242, 179, 850, 879, 509, 793, 82]}

 91%|█████████ | 909998/1000000 [13:24:50<57:57, 25.88it/s]global step 910000, trans_decision ep_re 126.40752823908781

{"global_step": 910000, "eval_re": [34.375391316925054, 207.7373587986364, 
219.87984744321255, 24.725014358706552, 178.2779383171156, 69.54431771982802, 
116.99130115468034, 98.0458586023266, 280.59833049517357, 33.89992418427347], 
"eval_len": [165, 1000, 1000, 66, 1000, 165, 285, 148, 900, 53]}

 92%|█████████▏| 919999/1000000 [13:33:40<52:02, 25.62it/s]global step 920000, trans_decision ep_re 154.07244289353454

{"global_step": 920000, "eval_re": [251.39035165414487, 317.5848778594638, 
22.421654605953115, 0.26760530425442397, 100.86571263493737, 194.378335261632, 
114.31495302360621, 165.04956721964592, 140.94394928007756, 233.50742209163042],
"eval_len": [1000, 680, 39, 58, 178, 331, 281, 706, 329, 1000]}

 93%|█████████▎| 929999/1000000 [13:42:30<45:25, 25.69it/s]global step 930000, trans_decision ep_re 118.73360921563233

{"global_step": 930000, "eval_re": [78.15248032611974, 40.6090832777019, 
325.14525295379684, 217.24773241557108, 83.70968382129774, 63.86722843814369, 
40.91765901227543, 111.08016584427175, 28.952323417811193, 197.65448264933406], 
"eval_len": [193, 98, 943, 1000, 194, 152, 77, 289, 124, 392]}

 94%|█████████▍| 939999/1000000 [13:51:20<38:59, 25.64it/s]global step 940000, trans_decision ep_re 217.63651145164516

{"global_step": 940000, "eval_re": [323.57538359817624, 254.04923824111745, 
97.62100268729223, 84.70681569125792, 84.1845272930912, 370.4021288298402, 
-3.094939248884284, 345.83273597731505, 329.59117920047316, 289.4970422467726], 
"eval_len": [1000, 555, 244, 161, 274, 1000, 14, 1000, 752, 1000]}

 95%|█████████▍| 949998/1000000 [14:00:20<32:04, 25.99it/s]global step 950000, trans_decision ep_re 181.0384980416148

{"global_step": 950000, "eval_re": [250.33092951977412, 121.63734151028018, 
-0.18268189924652278, 21.78581483165075, 305.56653975114926, 3.783773725535995, 
438.04686670381284, 359.57637654471034, 272.537876357585, 37.30214337089593], 
"eval_len": [549, 427, 29, 31, 1000, 33, 1000, 1000, 1000, 177]}

 96%|█████████▌| 959998/1000000 [14:09:10<25:46, 25.86it/s]global step 960000, trans_decision ep_re 107.31009461489677

{"global_step": 960000, "eval_re": [200.98588692043757, -13.751608550327761, 
109.91376393923542, 33.86632510990621, 363.5061276865126, 37.441439455552036, 
273.6885550928146, 9.605274772913946, -3.4297273131589643, 61.27490903508219], 
"eval_len": [353, 1000, 204, 135, 653, 60, 1000, 47, 33, 190]}

 97%|█████████▋| 969999/1000000 [14:18:00<19:28, 25.68it/s]global step 970000, trans_decision ep_re 174.73612388085093

{"global_step": 970000, "eval_re": [310.88319734783187, 5.141812873734101, 
287.1493290694588, 296.57912920203984, 12.63893779572668, 279.61863354320025, 
28.446134430568595, 288.1337700838404, 201.07866691892298, 37.6916275431857], 
"eval_len": [1000, 1000, 767, 761, 55, 1000, 104, 1000, 469, 249]}

 98%|█████████▊| 979997/1000000 [14:27:00<13:00, 25.64it/s]global step 980000, trans_decision ep_re 189.4395184354475

{"global_step": 980000, "eval_re": [71.27493955872038, 374.45831263185164, 
328.1415664750328, 416.30467847996175, 383.99326779247224, 32.200841287342186, 
-344.6540310819578, 131.23173812510765, 138.7310819017229, 362.7127891842212], 
"eval_len": [145, 838, 1000, 1000, 1000, 205, 1000, 233, 348, 1000]}

 99%|█████████▉| 989997/1000000 [14:36:00<06:29, 25.67it/s]global step 990000, trans_decision ep_re 181.2709977181

{"global_step": 990000, "eval_re": [431.76582422242114, 403.11459514897325, 
33.69766543386809, 52.81032245323324, 330.72692060223886, -5.39937250756113, 
87.21098273034652, 20.45621329255225, 388.13714101759973, 70.18968478732808], 
"eval_len": [878, 1000, 158, 71, 1000, 34, 495, 35, 1000, 159]}

100%|█████████▉| 999998/1000000 [14:45:00<00:00, 25.10it/s]global step 1000000, trans_decision ep_re 191.5490020179299

{"global_step": 1000000, "eval_re": [267.50930575013734, 60.81555019831991, 
119.1202951008726, 89.25140045591289, 254.21663054579363, 503.3534879060377, 
82.62532282622077, 302.7007193320748, 42.46526237309477, 193.43204569083443], 
"eval_len": [1000, 153, 1000, 341, 1000, 977, 214, 1000, 69, 1000]}

100%|██████████| 1000000/1000000 [14:45:23<00:00, 18.82it/s]
