
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:40<13:17:35, 20.69it/s]global step 10000, trans_decision ep_re 70.03043095690103

{"global_step": 10000, "eval_re": [70.03043095690103, 70.03043095690103, 
70.03043095690103, 70.03043095690103, 70.03043095690103, 70.03043095690103, 
70.03043095690103, 70.03043095690103, 70.03043095690103, 70.03043095690103], 
"eval_len": [79, 79, 79, 79, 79, 79, 79, 79, 79, 79]}

  2%|▏         | 19998/1000000 [16:30<13:09:42, 20.68it/s]global step 20000, trans_decision ep_re 144.44698032036692

{"global_step": 20000, "eval_re": [144.44698032036692, 144.44698032036692, 
144.44698032036692, 144.44698032036692, 144.44698032036692, 144.44698032036692, 
144.44698032036692, 144.44698032036692, 144.44698032036692, 144.44698032036692],
"eval_len": [118, 118, 118, 118, 118, 118, 118, 118, 118, 118]}

  3%|▎         | 29998/1000000 [27:30<13:03:51, 20.62it/s]global step 30000, trans_decision ep_re 82.27063712007813

{"global_step": 30000, "eval_re": [82.27063712007813, 82.27063712007813, 
82.27063712007813, 82.27063712007813, 82.27063712007813, 82.27063712007813, 
82.27063712007813, 82.27063712007813, 82.27063712007813, 82.27063712007813], 
"eval_len": [140, 140, 140, 140, 140, 140, 140, 140, 140, 140]}

  4%|▍         | 39999/1000000 [38:30<12:55:54, 20.62it/s]global step 40000, trans_decision ep_re 188.24757131385428

{"global_step": 40000, "eval_re": [188.24757131385428, 188.24757131385428, 
188.24757131385428, 188.24757131385428, 188.24757131385428, 188.24757131385428, 
188.24757131385428, 188.24757131385428, 188.24757131385428, 188.24757131385428],
"eval_len": [120, 120, 120, 120, 120, 120, 120, 120, 120, 120]}

  5%|▍         | 49999/1000000 [49:20<12:55:50, 20.41it/s]global step 50000, trans_decision ep_re 132.756833052319

{"global_step": 50000, "eval_re": [132.756833052319, 132.756833052319, 
132.756833052319, 132.756833052319, 132.756833052319, 132.756833052319, 
132.756833052319, 132.756833052319, 132.756833052319, 132.756833052319], 
"eval_len": [117, 117, 117, 117, 117, 117, 117, 117, 117, 117]}

  6%|▌         | 59997/1000000 [1:00:30<12:47:44, 20.41it/s]global step 60000, trans_decision ep_re 229.614806074633

{"global_step": 60000, "eval_re": [229.61480607463298, 229.61480607463298, 
229.61480607463298, 229.61480607463298, 229.61480607463298, 229.61480607463298, 
229.61480607463298, 229.61480607463298, 229.61480607463298, 229.61480607463298],
"eval_len": [132, 132, 132, 132, 132, 132, 132, 132, 132, 132]}

  7%|▋         | 69999/1000000 [1:11:30<12:34:30, 20.54it/s]global step 70000, trans_decision ep_re 269.6496198477406

{"global_step": 70000, "eval_re": [269.6496198477406, 269.6496198477406, 
269.6496198477406, 269.6496198477406, 269.6496198477406, 269.6496198477406, 
269.6496198477406, 269.6496198477406, 269.6496198477406, 269.6496198477406], 
"eval_len": [159, 159, 159, 159, 159, 159, 159, 159, 159, 159]}

  8%|▊         | 79998/1000000 [1:22:20<12:16:36, 20.82it/s]global step 80000, trans_decision ep_re 367.731728924613

{"global_step": 80000, "eval_re": [367.731728924613, 367.731728924613, 
367.731728924613, 367.731728924613, 367.731728924613, 367.731728924613, 
367.731728924613, 367.731728924613, 367.731728924613, 367.731728924613], 
"eval_len": [160, 160, 160, 160, 160, 160, 160, 160, 160, 160]}

  9%|▉         | 89999/1000000 [1:33:20<12:05:40, 20.90it/s]global step 90000, trans_decision ep_re 130.502703164442

{"global_step": 90000, "eval_re": [130.502703164442, 130.502703164442, 
130.502703164442, 130.502703164442, 130.502703164442, 130.502703164442, 
130.502703164442, 130.502703164442, 130.502703164442, 130.502703164442], 
"eval_len": [98, 98, 98, 98, 98, 98, 98, 98, 98, 98]}

 10%|▉         | 99999/1000000 [1:44:10<12:02:18, 20.77it/s]global step 100000, trans_decision ep_re 318.329241370543

{"global_step": 100000, "eval_re": [318.329241370543, 318.329241370543, 
318.329241370543, 318.329241370543, 318.329241370543, 318.329241370543, 
318.329241370543, 318.329241370543, 318.329241370543, 318.329241370543], 
"eval_len": [150, 150, 150, 150, 150, 150, 150, 150, 150, 150]}

 11%|█         | 109998/1000000 [1:55:00<11:42:26, 21.12it/s]global step 110000, trans_decision ep_re 59.622604464651054

{"global_step": 110000, "eval_re": [59.622604464651054, 59.622604464651054, 
59.622604464651054, 59.622604464651054, 59.622604464651054, 59.622604464651054, 
59.622604464651054, 59.622604464651054, 59.622604464651054, 59.622604464651054],
"eval_len": [59, 59, 59, 59, 59, 59, 59, 59, 59, 59]}

 12%|█▏        | 119999/1000000 [2:05:50<11:38:21, 21.00it/s]global step 120000, trans_decision ep_re 387.47125018938516

{"global_step": 120000, "eval_re": [387.4712501893852, 387.4712501893852, 
387.4712501893852, 387.4712501893852, 387.4712501893852, 387.4712501893852, 
387.4712501893852, 387.4712501893852, 387.4712501893852, 387.4712501893852], 
"eval_len": [156, 156, 156, 156, 156, 156, 156, 156, 156, 156]}

 13%|█▎        | 129999/1000000 [2:16:40<11:34:35, 20.88it/s]global step 130000, trans_decision ep_re 179.21936795209416

{"global_step": 130000, "eval_re": [179.21936795209416, 179.21936795209416, 
179.21936795209416, 179.21936795209416, 179.21936795209416, 179.21936795209416, 
179.21936795209416, 179.21936795209416, 179.21936795209416, 179.21936795209416],
"eval_len": [104, 104, 104, 104, 104, 104, 104, 104, 104, 104]}

 14%|█▍        | 139999/1000000 [2:27:30<11:27:02, 20.86it/s]global step 140000, trans_decision ep_re 161.3921597367368

{"global_step": 140000, "eval_re": [161.39215973673677, 161.39215973673677, 
161.39215973673677, 161.39215973673677, 161.39215973673677, 161.39215973673677, 
161.39215973673677, 161.39215973673677, 161.39215973673677, 161.39215973673677],
"eval_len": [145, 145, 145, 145, 145, 145, 145, 145, 145, 145]}

 15%|█▍        | 149999/1000000 [2:38:20<11:15:32, 20.97it/s]global step 150000, trans_decision ep_re 476.18320758468656

{"global_step": 150000, "eval_re": [476.18320758468644, 476.18320758468644, 
476.18320758468644, 476.18320758468644, 476.18320758468644, 476.18320758468644, 
476.18320758468644, 476.18320758468644, 476.18320758468644, 476.18320758468644],
"eval_len": [182, 182, 182, 182, 182, 182, 182, 182, 182, 182]}

 16%|█▌        | 159999/1000000 [2:49:10<11:08:45, 20.93it/s]global step 160000, trans_decision ep_re 140.51958426948104

{"global_step": 160000, "eval_re": [140.51958426948102, 140.51958426948102, 
140.51958426948102, 140.51958426948102, 140.51958426948102, 140.51958426948102, 
140.51958426948102, 140.51958426948102, 140.51958426948102, 140.51958426948102],
"eval_len": [92, 92, 92, 92, 92, 92, 92, 92, 92, 92]}

 17%|█▋        | 169999/1000000 [2:59:50<10:58:22, 21.01it/s]global step 170000, trans_decision ep_re 665.1032626208987

{"global_step": 170000, "eval_re": [665.1032626208987, 665.1032626208987, 
665.1032626208987, 665.1032626208987, 665.1032626208987, 665.1032626208987, 
665.1032626208987, 665.1032626208987, 665.1032626208987, 665.1032626208987], 
"eval_len": [212, 212, 212, 212, 212, 212, 212, 212, 212, 212]}

 18%|█▊        | 179999/1000000 [3:10:50<10:54:13, 20.89it/s]global step 180000, trans_decision ep_re 219.04186683383

{"global_step": 180000, "eval_re": [219.04186683383, 219.04186683383, 
219.04186683383, 219.04186683383, 219.04186683383, 219.04186683383, 
219.04186683383, 219.04186683383, 219.04186683383, 219.04186683383], "eval_len":
[155, 155, 155, 155, 155, 155, 155, 155, 155, 155]}

 19%|█▉        | 189999/1000000 [3:21:40<10:51:59, 20.71it/s]global step 190000, trans_decision ep_re 137.99779943655955

{"global_step": 190000, "eval_re": [137.99779943655955, 137.99779943655955, 
137.99779943655955, 137.99779943655955, 137.99779943655955, 137.99779943655955, 
137.99779943655955, 137.99779943655955, 137.99779943655955, 137.99779943655955],
"eval_len": [92, 92, 92, 92, 92, 92, 92, 92, 92, 92]}

 20%|█▉        | 199999/1000000 [3:32:20<10:36:41, 20.94it/s]global step 200000, trans_decision ep_re 530.4960225394209

{"global_step": 200000, "eval_re": [530.496022539421, 530.496022539421, 
530.496022539421, 530.496022539421, 530.496022539421, 530.496022539421, 
530.496022539421, 530.496022539421, 530.496022539421, 530.496022539421], 
"eval_len": [231, 231, 231, 231, 231, 231, 231, 231, 231, 231]}

 21%|██        | 209999/1000000 [3:43:10<10:20:48, 21.21it/s]global step 210000, trans_decision ep_re 167.22916896879914

{"global_step": 210000, "eval_re": [167.22916896879917, 167.22916896879917, 
167.22916896879917, 167.22916896879917, 167.22916896879917, 167.22916896879917, 
167.22916896879917, 167.22916896879917, 167.22916896879917, 167.22916896879917],
"eval_len": [106, 106, 106, 106, 106, 106, 106, 106, 106, 106]}

 22%|██▏       | 219999/1000000 [3:54:00<10:14:03, 21.17it/s]global step 220000, trans_decision ep_re 301.14965391961243

{"global_step": 220000, "eval_re": [301.14965391961243, 301.14965391961243, 
301.14965391961243, 301.14965391961243, 301.14965391961243, 301.14965391961243, 
301.14965391961243, 301.14965391961243, 301.14965391961243, 301.14965391961243],
"eval_len": [155, 155, 155, 155, 155, 155, 155, 155, 155, 155]}

 23%|██▎       | 229999/1000000 [4:04:40<10:07:51, 21.11it/s]global step 230000, trans_decision ep_re 161.0061870799645

{"global_step": 230000, "eval_re": [161.0061870799645, 161.0061870799645, 
161.0061870799645, 161.0061870799645, 161.0061870799645, 161.0061870799645, 
161.0061870799645, 161.0061870799645, 161.0061870799645, 161.0061870799645], 
"eval_len": [101, 101, 101, 101, 101, 101, 101, 101, 101, 101]}

 24%|██▍       | 239999/1000000 [4:15:30<10:08:15, 20.82it/s]global step 240000, trans_decision ep_re 308.37390600041095

{"global_step": 240000, "eval_re": [308.37390600041095, 308.37390600041095, 
308.37390600041095, 308.37390600041095, 308.37390600041095, 308.37390600041095, 
308.37390600041095, 308.37390600041095, 308.37390600041095, 308.37390600041095],
"eval_len": [157, 157, 157, 157, 157, 157, 157, 157, 157, 157]}

 25%|██▍       | 249998/1000000 [4:26:10<9:44:07, 21.40it/s]global step 250000, trans_decision ep_re 156.873522952746

{"global_step": 250000, "eval_re": [156.873522952746, 156.873522952746, 
156.873522952746, 156.873522952746, 156.873522952746, 156.873522952746, 
156.873522952746, 156.873522952746, 156.873522952746, 156.873522952746], 
"eval_len": [99, 99, 99, 99, 99, 99, 99, 99, 99, 99]}

 26%|██▌       | 259999/1000000 [4:36:50<9:45:55, 21.05it/s]global step 260000, trans_decision ep_re 428.4889367937638

{"global_step": 260000, "eval_re": [428.48893679376374, 428.48893679376374, 
428.48893679376374, 428.48893679376374, 428.48893679376374, 428.48893679376374, 
428.48893679376374, 428.48893679376374, 428.48893679376374, 428.48893679376374],
"eval_len": [183, 183, 183, 183, 183, 183, 183, 183, 183, 183]}

 27%|██▋       | 269998/1000000 [4:47:40<9:30:37, 21.32it/s]global step 270000, trans_decision ep_re 429.479822538966

{"global_step": 270000, "eval_re": [429.479822538966, 429.479822538966, 
429.479822538966, 429.479822538966, 429.479822538966, 429.479822538966, 
429.479822538966, 429.479822538966, 429.479822538966, 429.479822538966], 
"eval_len": [211, 211, 211, 211, 211, 211, 211, 211, 211, 211]}

 28%|██▊       | 279999/1000000 [4:58:30<9:29:58, 21.05it/s]global step 280000, trans_decision ep_re 870.7554819275443

{"global_step": 280000, "eval_re": [870.7554819275442, 870.7554819275442, 
870.7554819275442, 870.7554819275442, 870.7554819275442, 870.7554819275442, 
870.7554819275442, 870.7554819275442, 870.7554819275442, 870.7554819275442], 
"eval_len": [299, 299, 299, 299, 299, 299, 299, 299, 299, 299]}

 29%|██▉       | 289999/1000000 [5:09:20<9:20:11, 21.12it/s]global step 290000, trans_decision ep_re 1036.6412495673962

{"global_step": 290000, "eval_re": [1036.6412495673965, 1036.6412495673965, 
1036.6412495673965, 1036.6412495673965, 1036.6412495673965, 1036.6412495673965, 
1036.6412495673965, 1036.6412495673965, 1036.6412495673965, 1036.6412495673965],
"eval_len": [336, 336, 336, 336, 336, 336, 336, 336, 336, 336]}

 30%|██▉       | 299999/1000000 [5:20:10<9:11:58, 21.14it/s]global step 300000, trans_decision ep_re 300.5301367324765

{"global_step": 300000, "eval_re": [300.5301367324765, 300.5301367324765, 
300.5301367324765, 300.5301367324765, 300.5301367324765, 300.5301367324765, 
300.5301367324765, 300.5301367324765, 300.5301367324765, 300.5301367324765], 
"eval_len": [148, 148, 148, 148, 148, 148, 148, 148, 148, 148]}

 31%|███       | 309999/1000000 [5:30:50<9:05:15, 21.09it/s]global step 310000, trans_decision ep_re 148.68785079153895

{"global_step": 310000, "eval_re": [148.68785079153895, 148.68785079153895, 
148.68785079153895, 148.68785079153895, 148.68785079153895, 148.68785079153895, 
148.68785079153895, 148.68785079153895, 148.68785079153895, 148.68785079153895],
"eval_len": [96, 96, 96, 96, 96, 96, 96, 96, 96, 96]}

 32%|███▏      | 319999/1000000 [5:41:40<8:57:31, 21.08it/s]global step 320000, trans_decision ep_re 1220.6704472026654

{"global_step": 320000, "eval_re": [1220.6704472026654, 1220.6704472026654, 
1220.6704472026654, 1220.6704472026654, 1220.6704472026654, 1220.6704472026654, 
1220.6704472026654, 1220.6704472026654, 1220.6704472026654, 1220.6704472026654],
"eval_len": [432, 432, 432, 432, 432, 432, 432, 432, 432, 432]}

 33%|███▎      | 329997/1000000 [5:52:30<8:50:49, 21.04it/s]global step 330000, trans_decision ep_re 65.74189565060468

{"global_step": 330000, "eval_re": [65.74189565060469, 65.74189565060469, 
65.74189565060469, 65.74189565060469, 65.74189565060469, 65.74189565060469, 
65.74189565060469, 65.74189565060469, 65.74189565060469, 65.74189565060469], 
"eval_len": [80, 80, 80, 80, 80, 80, 80, 80, 80, 80]}

 34%|███▍      | 339999/1000000 [6:03:20<8:43:18, 21.02it/s]global step 340000, trans_decision ep_re 702.7262875976114

{"global_step": 340000, "eval_re": [702.7262875976114, 702.7262875976114, 
702.7262875976114, 702.7262875976114, 702.7262875976114, 702.7262875976114, 
702.7262875976114, 702.7262875976114, 702.7262875976114, 702.7262875976114], 
"eval_len": [269, 269, 269, 269, 269, 269, 269, 269, 269, 269]}

 35%|███▍      | 349999/1000000 [6:14:00<8:34:10, 21.07it/s]global step 350000, trans_decision ep_re 400.85218019488656

{"global_step": 350000, "eval_re": [400.8521801948866, 400.8521801948866, 
400.8521801948866, 400.8521801948866, 400.8521801948866, 400.8521801948866, 
400.8521801948866, 400.8521801948866, 400.8521801948866, 400.8521801948866], 
"eval_len": [176, 176, 176, 176, 176, 176, 176, 176, 176, 176]}

 36%|███▌      | 359998/1000000 [6:24:50<8:18:37, 21.39it/s]global step 360000, trans_decision ep_re 219.92726812189966

{"global_step": 360000, "eval_re": [219.92726812189966, 219.92726812189966, 
219.92726812189966, 219.92726812189966, 219.92726812189966, 219.92726812189966, 
219.92726812189966, 219.92726812189966, 219.92726812189966, 219.92726812189966],
"eval_len": [115, 115, 115, 115, 115, 115, 115, 115, 115, 115]}

 37%|███▋      | 369999/1000000 [6:35:30<8:20:25, 20.98it/s]global step 370000, trans_decision ep_re 1779.3913600357434

{"global_step": 370000, "eval_re": [1779.3913600357434, 1779.3913600357434, 
1779.3913600357434, 1779.3913600357434, 1779.3913600357434, 1779.3913600357434, 
1779.3913600357434, 1779.3913600357434, 1779.3913600357434, 1779.3913600357434],
"eval_len": [598, 598, 598, 598, 598, 598, 598, 598, 598, 598]}

 38%|███▊      | 379997/1000000 [6:46:30<8:10:20, 21.07it/s]global step 380000, trans_decision ep_re 539.1360881432611

{"global_step": 380000, "eval_re": [539.1360881432611, 539.1360881432611, 
539.1360881432611, 539.1360881432611, 539.1360881432611, 539.1360881432611, 
539.1360881432611, 539.1360881432611, 539.1360881432611, 539.1360881432611], 
"eval_len": [200, 200, 200, 200, 200, 200, 200, 200, 200, 200]}

 39%|███▉      | 389998/1000000 [6:57:20<7:55:23, 21.39it/s]global step 390000, trans_decision ep_re 391.9269367834287

{"global_step": 390000, "eval_re": [391.92693678342874, 391.92693678342874, 
391.92693678342874, 391.92693678342874, 391.92693678342874, 391.92693678342874, 
391.92693678342874, 391.92693678342874, 391.92693678342874, 391.92693678342874],
"eval_len": [164, 164, 164, 164, 164, 164, 164, 164, 164, 164]}

 40%|███▉      | 399999/1000000 [7:08:00<7:56:35, 20.98it/s]global step 400000, trans_decision ep_re 653.1650557700269

{"global_step": 400000, "eval_re": [653.1650557700269, 653.1650557700269, 
653.1650557700269, 653.1650557700269, 653.1650557700269, 653.1650557700269, 
653.1650557700269, 653.1650557700269, 653.1650557700269, 653.1650557700269], 
"eval_len": [239, 239, 239, 239, 239, 239, 239, 239, 239, 239]}

 41%|████      | 409998/1000000 [7:18:50<7:40:26, 21.36it/s]global step 410000, trans_decision ep_re 1122.841828635625

{"global_step": 410000, "eval_re": [1122.8418286356252, 1122.8418286356252, 
1122.8418286356252, 1122.8418286356252, 1122.8418286356252, 1122.8418286356252, 
1122.8418286356252, 1122.8418286356252, 1122.8418286356252, 1122.8418286356252],
"eval_len": [334, 334, 334, 334, 334, 334, 334, 334, 334, 334]}

 42%|████▏     | 419998/1000000 [7:29:40<7:34:20, 21.28it/s]global step 420000, trans_decision ep_re 339.87942806209406

{"global_step": 420000, "eval_re": [339.87942806209406, 339.87942806209406, 
339.87942806209406, 339.87942806209406, 339.87942806209406, 339.87942806209406, 
339.87942806209406, 339.87942806209406, 339.87942806209406, 339.87942806209406],
"eval_len": [152, 152, 152, 152, 152, 152, 152, 152, 152, 152]}

 43%|████▎     | 429999/1000000 [7:40:30<7:30:52, 21.07it/s]global step 430000, trans_decision ep_re 164.5582188922922

{"global_step": 430000, "eval_re": [164.5582188922922, 164.5582188922922, 
164.5582188922922, 164.5582188922922, 164.5582188922922, 164.5582188922922, 
164.5582188922922, 164.5582188922922, 164.5582188922922, 164.5582188922922], 
"eval_len": [98, 98, 98, 98, 98, 98, 98, 98, 98, 98]}

 44%|████▍     | 439999/1000000 [7:51:10<7:23:13, 21.06it/s]global step 440000, trans_decision ep_re 459.3728066976695

{"global_step": 440000, "eval_re": [459.3728066976696, 459.3728066976696, 
459.3728066976696, 459.3728066976696, 459.3728066976696, 459.3728066976696, 
459.3728066976696, 459.3728066976696, 459.3728066976696, 459.3728066976696], 
"eval_len": [183, 183, 183, 183, 183, 183, 183, 183, 183, 183]}

 45%|████▍     | 449999/1000000 [8:01:50<7:14:40, 21.09it/s]global step 450000, trans_decision ep_re 343.92081075756795

{"global_step": 450000, "eval_re": [343.920810757568, 343.920810757568, 
343.920810757568, 343.920810757568, 343.920810757568, 343.920810757568, 
343.920810757568, 343.920810757568, 343.920810757568, 343.920810757568], 
"eval_len": [190, 190, 190, 190, 190, 190, 190, 190, 190, 190]}

 46%|████▌     | 459998/1000000 [8:12:40<7:03:20, 21.26it/s]global step 460000, trans_decision ep_re 737.4746233666284

{"global_step": 460000, "eval_re": [737.4746233666284, 737.4746233666284, 
737.4746233666284, 737.4746233666284, 737.4746233666284, 737.4746233666284, 
737.4746233666284, 737.4746233666284, 737.4746233666284, 737.4746233666284], 
"eval_len": [227, 227, 227, 227, 227, 227, 227, 227, 227, 227]}

 47%|████▋     | 469999/1000000 [8:23:30<7:00:11, 21.02it/s]global step 470000, trans_decision ep_re 398.66950822021056

{"global_step": 470000, "eval_re": [398.6695082202106, 398.6695082202106, 
398.6695082202106, 398.6695082202106, 398.6695082202106, 398.6695082202106, 
398.6695082202106, 398.6695082202106, 398.6695082202106, 398.6695082202106], 
"eval_len": [168, 168, 168, 168, 168, 168, 168, 168, 168, 168]}

 48%|████▊     | 479998/1000000 [8:34:10<6:48:14, 21.23it/s]global step 480000, trans_decision ep_re 732.3888084924922

{"global_step": 480000, "eval_re": [732.3888084924923, 732.3888084924923, 
732.3888084924923, 732.3888084924923, 732.3888084924923, 732.3888084924923, 
732.3888084924923, 732.3888084924923, 732.3888084924923, 732.3888084924923], 
"eval_len": [230, 230, 230, 230, 230, 230, 230, 230, 230, 230]}

 49%|████▉     | 489999/1000000 [8:45:00<6:43:04, 21.09it/s]global step 490000, trans_decision ep_re 146.6288211439956

{"global_step": 490000, "eval_re": [146.62882114399562, 146.62882114399562, 
146.62882114399562, 146.62882114399562, 146.62882114399562, 146.62882114399562, 
146.62882114399562, 146.62882114399562, 146.62882114399562, 146.62882114399562],
"eval_len": [94, 94, 94, 94, 94, 94, 94, 94, 94, 94]}

 50%|████▉     | 499998/1000000 [8:55:40<6:32:33, 21.23it/s]global step 500000, trans_decision ep_re 830.3351892265298

{"global_step": 500000, "eval_re": [830.3351892265298, 830.3351892265298, 
830.3351892265298, 830.3351892265298, 830.3351892265298, 830.3351892265298, 
830.3351892265298, 830.3351892265298, 830.3351892265298, 830.3351892265298], 
"eval_len": [265, 265, 265, 265, 265, 265, 265, 265, 265, 265]}

 51%|█████     | 509999/1000000 [9:06:30<6:28:55, 21.00it/s]global step 510000, trans_decision ep_re 813.4860186291963

{"global_step": 510000, "eval_re": [813.4860186291963, 813.4860186291963, 
813.4860186291963, 813.4860186291963, 813.4860186291963, 813.4860186291963, 
813.4860186291963, 813.4860186291963, 813.4860186291963, 813.4860186291963], 
"eval_len": [260, 260, 260, 260, 260, 260, 260, 260, 260, 260]}

 52%|█████▏    | 519999/1000000 [9:17:10<6:17:04, 21.22it/s]global step 520000, trans_decision ep_re 146.23080331435318

{"global_step": 520000, "eval_re": [146.23080331435318, 146.23080331435318, 
146.23080331435318, 146.23080331435318, 146.23080331435318, 146.23080331435318, 
146.23080331435318, 146.23080331435318, 146.23080331435318, 146.23080331435318],
"eval_len": [96, 96, 96, 96, 96, 96, 96, 96, 96, 96]}

 53%|█████▎    | 529999/1000000 [9:28:00<6:10:49, 21.12it/s]global step 530000, trans_decision ep_re 448.44146526059995

{"global_step": 530000, "eval_re": [448.4414652605999, 448.4414652605999, 
448.4414652605999, 448.4414652605999, 448.4414652605999, 448.4414652605999, 
448.4414652605999, 448.4414652605999, 448.4414652605999, 448.4414652605999], 
"eval_len": [174, 174, 174, 174, 174, 174, 174, 174, 174, 174]}

 54%|█████▍    | 539998/1000000 [9:38:40<6:00:50, 21.25it/s]global step 540000, trans_decision ep_re 810.2540334788009

{"global_step": 540000, "eval_re": [810.2540334788009, 810.2540334788009, 
810.2540334788009, 810.2540334788009, 810.2540334788009, 810.2540334788009, 
810.2540334788009, 810.2540334788009, 810.2540334788009, 810.2540334788009], 
"eval_len": [279, 279, 279, 279, 279, 279, 279, 279, 279, 279]}

 55%|█████▍    | 549998/1000000 [9:49:30<5:51:16, 21.35it/s]global step 550000, trans_decision ep_re 648.050993663176

{"global_step": 550000, "eval_re": [648.050993663176, 648.050993663176, 
648.050993663176, 648.050993663176, 648.050993663176, 648.050993663176, 
648.050993663176, 648.050993663176, 648.050993663176, 648.050993663176], 
"eval_len": [251, 251, 251, 251, 251, 251, 251, 251, 251, 251]}

 56%|█████▌    | 559998/1000000 [10:00:20<5:44:06, 21.31it/s]global step 560000, trans_decision ep_re 473.3538682007169

{"global_step": 560000, "eval_re": [473.35386820071693, 473.35386820071693, 
473.35386820071693, 473.35386820071693, 473.35386820071693, 473.35386820071693, 
473.35386820071693, 473.35386820071693, 473.35386820071693, 473.35386820071693],
"eval_len": [180, 180, 180, 180, 180, 180, 180, 180, 180, 180]}

 57%|█████▋    | 569999/1000000 [10:11:10<5:39:33, 21.11it/s]global step 570000, trans_decision ep_re 436.7896310694179

{"global_step": 570000, "eval_re": [436.7896310694179, 436.7896310694179, 
436.7896310694179, 436.7896310694179, 436.7896310694179, 436.7896310694179, 
436.7896310694179, 436.7896310694179, 436.7896310694179, 436.7896310694179], 
"eval_len": [175, 175, 175, 175, 175, 175, 175, 175, 175, 175]}

 58%|█████▊    | 579998/1000000 [10:21:50<5:27:52, 21.35it/s]global step 580000, trans_decision ep_re 1044.7862875589294

{"global_step": 580000, "eval_re": [1044.7862875589294, 1044.7862875589294, 
1044.7862875589294, 1044.7862875589294, 1044.7862875589294, 1044.7862875589294, 
1044.7862875589294, 1044.7862875589294, 1044.7862875589294, 1044.7862875589294],
"eval_len": [345, 345, 345, 345, 345, 345, 345, 345, 345, 345]}

 59%|█████▉    | 589999/1000000 [10:32:40<5:23:22, 21.13it/s]global step 590000, trans_decision ep_re 979.3172578728261

{"global_step": 590000, "eval_re": [979.3172578728261, 979.3172578728261, 
979.3172578728261, 979.3172578728261, 979.3172578728261, 979.3172578728261, 
979.3172578728261, 979.3172578728261, 979.3172578728261, 979.3172578728261], 
"eval_len": [323, 323, 323, 323, 323, 323, 323, 323, 323, 323]}

 60%|█████▉    | 599999/1000000 [10:43:30<5:16:04, 21.09it/s]global step 600000, trans_decision ep_re 776.2807260479342

{"global_step": 600000, "eval_re": [776.2807260479342, 776.2807260479342, 
776.2807260479342, 776.2807260479342, 776.2807260479342, 776.2807260479342, 
776.2807260479342, 776.2807260479342, 776.2807260479342, 776.2807260479342], 
"eval_len": [259, 259, 259, 259, 259, 259, 259, 259, 259, 259]}

 61%|██████    | 609999/1000000 [10:54:20<5:09:31, 21.00it/s]global step 610000, trans_decision ep_re 487.2286321127459

{"global_step": 610000, "eval_re": [487.2286321127459, 487.2286321127459, 
487.2286321127459, 487.2286321127459, 487.2286321127459, 487.2286321127459, 
487.2286321127459, 487.2286321127459, 487.2286321127459, 487.2286321127459], 
"eval_len": [189, 189, 189, 189, 189, 189, 189, 189, 189, 189]}

 62%|██████▏   | 619998/1000000 [11:05:10<4:57:37, 21.28it/s]global step 620000, trans_decision ep_re 245.5515120447788

{"global_step": 620000, "eval_re": [245.55151204477875, 245.55151204477875, 
245.55151204477875, 245.55151204477875, 245.55151204477875, 245.55151204477875, 
245.55151204477875, 245.55151204477875, 245.55151204477875, 245.55151204477875],
"eval_len": [154, 154, 154, 154, 154, 154, 154, 154, 154, 154]}

 63%|██████▎   | 629999/1000000 [11:16:00<4:52:17, 21.10it/s]global step 630000, trans_decision ep_re 274.64781905700227

{"global_step": 630000, "eval_re": [274.64781905700227, 274.64781905700227, 
274.64781905700227, 274.64781905700227, 274.64781905700227, 274.64781905700227, 
274.64781905700227, 274.64781905700227, 274.64781905700227, 274.64781905700227],
"eval_len": [144, 144, 144, 144, 144, 144, 144, 144, 144, 144]}

 64%|██████▍   | 639998/1000000 [11:26:40<4:42:15, 21.26it/s]global step 640000, trans_decision ep_re 267.16089645761724

{"global_step": 640000, "eval_re": [267.16089645761724, 267.16089645761724, 
267.16089645761724, 267.16089645761724, 267.16089645761724, 267.16089645761724, 
267.16089645761724, 267.16089645761724, 267.16089645761724, 267.16089645761724],
"eval_len": [151, 151, 151, 151, 151, 151, 151, 151, 151, 151]}

 65%|██████▍   | 649999/1000000 [11:37:30<4:37:27, 21.02it/s]global step 650000, trans_decision ep_re 392.0705343877999

{"global_step": 650000, "eval_re": [392.0705343877999, 392.0705343877999, 
392.0705343877999, 392.0705343877999, 392.0705343877999, 392.0705343877999, 
392.0705343877999, 392.0705343877999, 392.0705343877999, 392.0705343877999], 
"eval_len": [190, 190, 190, 190, 190, 190, 190, 190, 190, 190]}

 66%|██████▌   | 659999/1000000 [11:48:10<4:29:15, 21.04it/s]global step 660000, trans_decision ep_re 395.4892652247272

{"global_step": 660000, "eval_re": [395.4892652247272, 395.4892652247272, 
395.4892652247272, 395.4892652247272, 395.4892652247272, 395.4892652247272, 
395.4892652247272, 395.4892652247272, 395.4892652247272, 395.4892652247272], 
"eval_len": [186, 186, 186, 186, 186, 186, 186, 186, 186, 186]}

 67%|██████▋   | 669998/1000000 [11:59:00<4:18:54, 21.24it/s]global step 670000, trans_decision ep_re 663.8818665086803

{"global_step": 670000, "eval_re": [663.8818665086804, 663.8818665086804, 
663.8818665086804, 663.8818665086804, 663.8818665086804, 663.8818665086804, 
663.8818665086804, 663.8818665086804, 663.8818665086804, 663.8818665086804], 
"eval_len": [235, 235, 235, 235, 235, 235, 235, 235, 235, 235]}

 68%|██████▊   | 679999/1000000 [12:09:50<4:12:55, 21.09it/s]global step 680000, trans_decision ep_re 394.15534050838403

{"global_step": 680000, "eval_re": [394.15534050838403, 394.15534050838403, 
394.15534050838403, 394.15534050838403, 394.15534050838403, 394.15534050838403, 
394.15534050838403, 394.15534050838403, 394.15534050838403, 394.15534050838403],
"eval_len": [205, 205, 205, 205, 205, 205, 205, 205, 205, 205]}

 69%|██████▉   | 689999/1000000 [12:20:30<4:05:03, 21.08it/s]global step 690000, trans_decision ep_re 514.8192691156036

{"global_step": 690000, "eval_re": [514.8192691156036, 514.8192691156036, 
514.8192691156036, 514.8192691156036, 514.8192691156036, 514.8192691156036, 
514.8192691156036, 514.8192691156036, 514.8192691156036, 514.8192691156036], 
"eval_len": [192, 192, 192, 192, 192, 192, 192, 192, 192, 192]}

 70%|██████▉   | 699998/1000000 [12:31:20<3:55:34, 21.22it/s]global step 700000, trans_decision ep_re 808.8031746031942

{"global_step": 700000, "eval_re": [808.8031746031943, 808.8031746031943, 
808.8031746031943, 808.8031746031943, 808.8031746031943, 808.8031746031943, 
808.8031746031943, 808.8031746031943, 808.8031746031943, 808.8031746031943], 
"eval_len": [274, 274, 274, 274, 274, 274, 274, 274, 274, 274]}

 71%|███████   | 709998/1000000 [12:42:10<3:47:42, 21.23it/s]global step 710000, trans_decision ep_re 371.15872788302556

{"global_step": 710000, "eval_re": [371.1587278830255, 371.1587278830255, 
371.1587278830255, 371.1587278830255, 371.1587278830255, 371.1587278830255, 
371.1587278830255, 371.1587278830255, 371.1587278830255, 371.1587278830255], 
"eval_len": [156, 156, 156, 156, 156, 156, 156, 156, 156, 156]}

 72%|███████▏  | 719999/1000000 [12:52:50<3:40:53, 21.13it/s]global step 720000, trans_decision ep_re 741.8567768470368

{"global_step": 720000, "eval_re": [741.8567768470368, 741.8567768470368, 
741.8567768470368, 741.8567768470368, 741.8567768470368, 741.8567768470368, 
741.8567768470368, 741.8567768470368, 741.8567768470368, 741.8567768470368], 
"eval_len": [237, 237, 237, 237, 237, 237, 237, 237, 237, 237]}

 73%|███████▎  | 729998/1000000 [13:03:40<3:32:13, 21.20it/s]global step 730000, trans_decision ep_re 155.39348069380793

{"global_step": 730000, "eval_re": [155.39348069380793, 155.39348069380793, 
155.39348069380793, 155.39348069380793, 155.39348069380793, 155.39348069380793, 
155.39348069380793, 155.39348069380793, 155.39348069380793, 155.39348069380793],
"eval_len": [195, 195, 195, 195, 195, 195, 195, 195, 195, 195]}

 74%|███████▍  | 739999/1000000 [13:14:30<3:25:10, 21.12it/s]global step 740000, trans_decision ep_re 180.48304959979694

{"global_step": 740000, "eval_re": [180.48304959979694, 180.48304959979694, 
180.48304959979694, 180.48304959979694, 180.48304959979694, 180.48304959979694, 
180.48304959979694, 180.48304959979694, 180.48304959979694, 180.48304959979694],
"eval_len": [283, 283, 283, 283, 283, 283, 283, 283, 283, 283]}

 75%|███████▍  | 749999/1000000 [13:25:20<3:16:49, 21.17it/s]global step 750000, trans_decision ep_re 926.3201254949365

{"global_step": 750000, "eval_re": [926.3201254949364, 926.3201254949364, 
926.3201254949364, 926.3201254949364, 926.3201254949364, 926.3201254949364, 
926.3201254949364, 926.3201254949364, 926.3201254949364, 926.3201254949364], 
"eval_len": [310, 310, 310, 310, 310, 310, 310, 310, 310, 310]}

 76%|███████▌  | 759999/1000000 [13:36:10<3:09:42, 21.09it/s]global step 760000, trans_decision ep_re 235.4246548609778

{"global_step": 760000, "eval_re": [235.42465486097782, 235.42465486097782, 
235.42465486097782, 235.42465486097782, 235.42465486097782, 235.42465486097782, 
235.42465486097782, 235.42465486097782, 235.42465486097782, 235.42465486097782],
"eval_len": [134, 134, 134, 134, 134, 134, 134, 134, 134, 134]}

 77%|███████▋  | 769998/1000000 [13:46:50<3:00:39, 21.22it/s]global step 770000, trans_decision ep_re 348.83800338822783

{"global_step": 770000, "eval_re": [348.8380033882279, 348.8380033882279, 
348.8380033882279, 348.8380033882279, 348.8380033882279, 348.8380033882279, 
348.8380033882279, 348.8380033882279, 348.8380033882279, 348.8380033882279], 
"eval_len": [183, 183, 183, 183, 183, 183, 183, 183, 183, 183]}

 78%|███████▊  | 779999/1000000 [13:57:40<2:53:46, 21.10it/s]global step 780000, trans_decision ep_re 700.8812034849482

{"global_step": 780000, "eval_re": [700.8812034849483, 700.8812034849483, 
700.8812034849483, 700.8812034849483, 700.8812034849483, 700.8812034849483, 
700.8812034849483, 700.8812034849483, 700.8812034849483, 700.8812034849483], 
"eval_len": [235, 235, 235, 235, 235, 235, 235, 235, 235, 235]}

 79%|███████▉  | 789999/1000000 [14:08:20<2:46:40, 21.00it/s]global step 790000, trans_decision ep_re 643.8331905585368

{"global_step": 790000, "eval_re": [643.8331905585369, 643.8331905585369, 
643.8331905585369, 643.8331905585369, 643.8331905585369, 643.8331905585369, 
643.8331905585369, 643.8331905585369, 643.8331905585369, 643.8331905585369], 
"eval_len": [229, 229, 229, 229, 229, 229, 229, 229, 229, 229]}

 80%|███████▉  | 799999/1000000 [14:19:10<2:38:25, 21.04it/s]global step 800000, trans_decision ep_re 734.1634344842402

{"global_step": 800000, "eval_re": [734.16343448424, 734.16343448424, 
734.16343448424, 734.16343448424, 734.16343448424, 734.16343448424, 
734.16343448424, 734.16343448424, 734.16343448424, 734.16343448424], "eval_len":
[271, 271, 271, 271, 271, 271, 271, 271, 271, 271]}

 81%|████████  | 809998/1000000 [14:30:00<2:28:26, 21.33it/s]global step 810000, trans_decision ep_re 618.2203193441298

{"global_step": 810000, "eval_re": [618.2203193441297, 618.2203193441297, 
618.2203193441297, 618.2203193441297, 618.2203193441297, 618.2203193441297, 
618.2203193441297, 618.2203193441297, 618.2203193441297, 618.2203193441297], 
"eval_len": [210, 210, 210, 210, 210, 210, 210, 210, 210, 210]}

 82%|████████▏ | 819999/1000000 [14:40:50<2:21:40, 21.18it/s]global step 820000, trans_decision ep_re 1186.995186385498

{"global_step": 820000, "eval_re": [1186.9951863854978, 1186.9951863854978, 
1186.9951863854978, 1186.9951863854978, 1186.9951863854978, 1186.9951863854978, 
1186.9951863854978, 1186.9951863854978, 1186.9951863854978, 1186.9951863854978],
"eval_len": [369, 369, 369, 369, 369, 369, 369, 369, 369, 369]}

 83%|████████▎ | 829999/1000000 [14:51:40<2:13:20, 21.25it/s]global step 830000, trans_decision ep_re 652.4058422857115

{"global_step": 830000, "eval_re": [652.4058422857115, 652.4058422857115, 
652.4058422857115, 652.4058422857115, 652.4058422857115, 652.4058422857115, 
652.4058422857115, 652.4058422857115, 652.4058422857115, 652.4058422857115], 
"eval_len": [239, 239, 239, 239, 239, 239, 239, 239, 239, 239]}

 84%|████████▍ | 839999/1000000 [15:02:20<2:06:37, 21.06it/s]global step 840000, trans_decision ep_re 748.0098327155781

{"global_step": 840000, "eval_re": [748.009832715578, 748.009832715578, 
748.009832715578, 748.009832715578, 748.009832715578, 748.009832715578, 
748.009832715578, 748.009832715578, 748.009832715578, 748.009832715578], 
"eval_len": [234, 234, 234, 234, 234, 234, 234, 234, 234, 234]}

 85%|████████▍ | 849999/1000000 [15:13:11<1:58:04, 21.17it/s]global step 850000, trans_decision ep_re 711.0828573414203

{"global_step": 850000, "eval_re": [711.0828573414204, 711.0828573414204, 
711.0828573414204, 711.0828573414204, 711.0828573414204, 711.0828573414204, 
711.0828573414204, 711.0828573414204, 711.0828573414204, 711.0828573414204], 
"eval_len": [237, 237, 237, 237, 237, 237, 237, 237, 237, 237]}

 86%|████████▌ | 859998/1000000 [15:24:01<1:49:28, 21.31it/s]global step 860000, trans_decision ep_re 167.115727543908

{"global_step": 860000, "eval_re": [167.115727543908, 167.115727543908, 
167.115727543908, 167.115727543908, 167.115727543908, 167.115727543908, 
167.115727543908, 167.115727543908, 167.115727543908, 167.115727543908], 
"eval_len": [101, 101, 101, 101, 101, 101, 101, 101, 101, 101]}

 87%|████████▋ | 869998/1000000 [15:34:41<1:41:59, 21.24it/s]global step 870000, trans_decision ep_re 740.7850945443362

{"global_step": 870000, "eval_re": [740.7850945443362, 740.7850945443362, 
740.7850945443362, 740.7850945443362, 740.7850945443362, 740.7850945443362, 
740.7850945443362, 740.7850945443362, 740.7850945443362, 740.7850945443362], 
"eval_len": [249, 249, 249, 249, 249, 249, 249, 249, 249, 249]}

 88%|████████▊ | 879998/1000000 [15:45:31<1:34:08, 21.24it/s]global step 880000, trans_decision ep_re 474.3429852433457

{"global_step": 880000, "eval_re": [474.34298524334577, 474.34298524334577, 
474.34298524334577, 474.34298524334577, 474.34298524334577, 474.34298524334577, 
474.34298524334577, 474.34298524334577, 474.34298524334577, 474.34298524334577],
"eval_len": [186, 186, 186, 186, 186, 186, 186, 186, 186, 186]}

 89%|████████▉ | 889999/1000000 [15:56:21<1:26:26, 21.21it/s]global step 890000, trans_decision ep_re 516.7514852744882

{"global_step": 890000, "eval_re": [516.7514852744882, 516.7514852744882, 
516.7514852744882, 516.7514852744882, 516.7514852744882, 516.7514852744882, 
516.7514852744882, 516.7514852744882, 516.7514852744882, 516.7514852744882], 
"eval_len": [230, 230, 230, 230, 230, 230, 230, 230, 230, 230]}

 90%|████████▉ | 899999/1000000 [16:07:01<1:19:59, 20.83it/s]global step 900000, trans_decision ep_re 912.0502946504237

{"global_step": 900000, "eval_re": [912.0502946504237, 912.0502946504237, 
912.0502946504237, 912.0502946504237, 912.0502946504237, 912.0502946504237, 
912.0502946504237, 912.0502946504237, 912.0502946504237, 912.0502946504237], 
"eval_len": [275, 275, 275, 275, 275, 275, 275, 275, 275, 275]}

 91%|█████████ | 909998/1000000 [16:18:01<1:11:48, 20.89it/s]global step 910000, trans_decision ep_re 1090.2881053533245

{"global_step": 910000, "eval_re": [1090.2881053533247, 1090.2881053533247, 
1090.2881053533247, 1090.2881053533247, 1090.2881053533247, 1090.2881053533247, 
1090.2881053533247, 1090.2881053533247, 1090.2881053533247, 1090.2881053533247],
"eval_len": [322, 322, 322, 322, 322, 322, 322, 322, 322, 322]}

 92%|█████████▏| 919999/1000000 [16:29:01<1:04:13, 20.76it/s]global step 920000, trans_decision ep_re 659.1864474006971

{"global_step": 920000, "eval_re": [659.1864474006971, 659.1864474006971, 
659.1864474006971, 659.1864474006971, 659.1864474006971, 659.1864474006971, 
659.1864474006971, 659.1864474006971, 659.1864474006971, 659.1864474006971], 
"eval_len": [233, 233, 233, 233, 233, 233, 233, 233, 233, 233]}

 93%|█████████▎| 929999/1000000 [16:39:51<55:31, 21.01it/s]global step 930000, trans_decision ep_re 980.2969554377263

{"global_step": 930000, "eval_re": [980.2969554377263, 980.2969554377263, 
980.2969554377263, 980.2969554377263, 980.2969554377263, 980.2969554377263, 
980.2969554377263, 980.2969554377263, 980.2969554377263, 980.2969554377263], 
"eval_len": [310, 310, 310, 310, 310, 310, 310, 310, 310, 310]}

 94%|█████████▍| 939999/1000000 [16:51:01<48:24, 20.66it/s]global step 940000, trans_decision ep_re 361.15167548635645

{"global_step": 940000, "eval_re": [361.15167548635645, 361.15167548635645, 
361.15167548635645, 361.15167548635645, 361.15167548635645, 361.15167548635645, 
361.15167548635645, 361.15167548635645, 361.15167548635645, 361.15167548635645],
"eval_len": [185, 185, 185, 185, 185, 185, 185, 185, 185, 185]}

 95%|█████████▍| 949998/1000000 [17:01:51<39:16, 21.21it/s]global step 950000, trans_decision ep_re 994.3894803837954

{"global_step": 950000, "eval_re": [994.3894803837954, 994.3894803837954, 
994.3894803837954, 994.3894803837954, 994.3894803837954, 994.3894803837954, 
994.3894803837954, 994.3894803837954, 994.3894803837954, 994.3894803837954], 
"eval_len": [297, 297, 297, 297, 297, 297, 297, 297, 297, 297]}

 96%|█████████▌| 959999/1000000 [17:12:41<31:45, 20.99it/s]global step 960000, trans_decision ep_re 323.92195100085297

{"global_step": 960000, "eval_re": [323.92195100085297, 323.92195100085297, 
323.92195100085297, 323.92195100085297, 323.92195100085297, 323.92195100085297, 
323.92195100085297, 323.92195100085297, 323.92195100085297, 323.92195100085297],
"eval_len": [166, 166, 166, 166, 166, 166, 166, 166, 166, 166]}

 97%|█████████▋| 969998/1000000 [17:23:31<23:21, 21.41it/s]global step 970000, trans_decision ep_re 509.2647691567533

{"global_step": 970000, "eval_re": [509.26476915675323, 509.26476915675323, 
509.26476915675323, 509.26476915675323, 509.26476915675323, 509.26476915675323, 
509.26476915675323, 509.26476915675323, 509.26476915675323, 509.26476915675323],
"eval_len": [185, 185, 185, 185, 185, 185, 185, 185, 185, 185]}

 98%|█████████▊| 979998/1000000 [17:34:11<15:43, 21.21it/s]global step 980000, trans_decision ep_re 546.5729540009377

{"global_step": 980000, "eval_re": [546.5729540009377, 546.5729540009377, 
546.5729540009377, 546.5729540009377, 546.5729540009377, 546.5729540009377, 
546.5729540009377, 546.5729540009377, 546.5729540009377, 546.5729540009377], 
"eval_len": [221, 221, 221, 221, 221, 221, 221, 221, 221, 221]}

 99%|█████████▉| 989999/1000000 [17:45:01<08:05, 20.60it/s]global step 990000, trans_decision ep_re 432.1094748475583

{"global_step": 990000, "eval_re": [432.1094748475583, 432.1094748475583, 
432.1094748475583, 432.1094748475583, 432.1094748475583, 432.1094748475583, 
432.1094748475583, 432.1094748475583, 432.1094748475583, 432.1094748475583], 
"eval_len": [166, 166, 166, 166, 166, 166, 166, 166, 166, 166]}

100%|█████████▉| 999999/1000000 [17:56:01<00:00, 20.82it/s]global step 1000000, trans_decision ep_re 523.8209986604342

{"global_step": 1000000, "eval_re": [523.8209986604342, 523.8209986604342, 
523.8209986604342, 523.8209986604342, 523.8209986604342, 523.8209986604342, 
523.8209986604342, 523.8209986604342, 523.8209986604342, 523.8209986604342], 
"eval_len": [199, 199, 199, 199, 199, 199, 199, 199, 199, 199]}

100%|██████████| 1000000/1000000 [17:56:08<00:00, 15.49it/s]
