
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:40<10:53:24, 25.25it/s]global step 10000, trans_decision ep_re 427.4683154915342

{"global_step": 10000, "eval_re": [427.4683154915342, 427.4683154915342, 
427.4683154915342, 427.4683154915342, 427.4683154915342, 427.4683154915342, 
427.4683154915342, 427.4683154915342, 427.4683154915342, 427.4683154915342], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [14:00<10:49:24, 25.15it/s]global step 20000, trans_decision ep_re 880.1588266400195

{"global_step": 20000, "eval_re": [880.1588266400195, 880.1588266400195, 
880.1588266400195, 880.1588266400195, 880.1588266400195, 880.1588266400195, 
880.1588266400195, 880.1588266400195, 880.1588266400195, 880.1588266400195], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29998/1000000 [23:10<10:37:53, 25.34it/s]global step 30000, trans_decision ep_re 291.1840415505375

{"global_step": 30000, "eval_re": [291.1840415505374, 291.1840415505374, 
291.1840415505374, 291.1840415505374, 291.1840415505374, 291.1840415505374, 
291.1840415505374, 291.1840415505374, 291.1840415505374, 291.1840415505374], 
"eval_len": [263, 263, 263, 263, 263, 263, 263, 263, 263, 263]}

  4%|▍         | 39999/1000000 [31:50<10:39:29, 25.02it/s]global step 40000, trans_decision ep_re 130.47519066812475

{"global_step": 40000, "eval_re": [130.47519066812478, 130.47519066812478, 
130.47519066812478, 130.47519066812478, 130.47519066812478, 130.47519066812478, 
130.47519066812478, 130.47519066812478, 130.47519066812478, 130.47519066812478],
"eval_len": [116, 116, 116, 116, 116, 116, 116, 116, 116, 116]}

  5%|▍         | 49997/1000000 [40:50<10:30:29, 25.11it/s]global step 50000, trans_decision ep_re 886.7292410607224

{"global_step": 50000, "eval_re": [886.7292410607224, 886.7292410607224, 
886.7292410607224, 886.7292410607224, 886.7292410607224, 886.7292410607224, 
886.7292410607224, 886.7292410607224, 886.7292410607224, 886.7292410607224], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [50:10<10:21:02, 25.23it/s]global step 60000, trans_decision ep_re 850.0688929295068

{"global_step": 60000, "eval_re": [850.0688929295068, 850.0688929295068, 
850.0688929295068, 850.0688929295068, 850.0688929295068, 850.0688929295068, 
850.0688929295068, 850.0688929295068, 850.0688929295068, 850.0688929295068], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69998/1000000 [59:30<10:07:42, 25.51it/s]global step 70000, trans_decision ep_re 1138.0923528367916

{"global_step": 70000, "eval_re": [1138.0923528367916, 1138.0923528367916, 
1138.0923528367916, 1138.0923528367916, 1138.0923528367916, 1138.0923528367916, 
1138.0923528367916, 1138.0923528367916, 1138.0923528367916, 1138.0923528367916],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79998/1000000 [1:08:40<10:02:46, 25.44it/s]global step 80000, trans_decision ep_re 365.4630418263579

{"global_step": 80000, "eval_re": [365.4630418263579, 365.4630418263579, 
365.4630418263579, 365.4630418263579, 365.4630418263579, 365.4630418263579, 
365.4630418263579, 365.4630418263579, 365.4630418263579, 365.4630418263579], 
"eval_len": [189, 189, 189, 189, 189, 189, 189, 189, 189, 189]}

  9%|▉         | 89999/1000000 [1:17:30<10:01:09, 25.23it/s]global step 90000, trans_decision ep_re 453.0221936945716

{"global_step": 90000, "eval_re": [453.02219369457157, 453.02219369457157, 
453.02219369457157, 453.02219369457157, 453.02219369457157, 453.02219369457157, 
453.02219369457157, 453.02219369457157, 453.02219369457157, 453.02219369457157],
"eval_len": [264, 264, 264, 264, 264, 264, 264, 264, 264, 264]}

 10%|▉         | 99999/1000000 [1:26:20<9:59:44, 25.01it/s]global step 100000, trans_decision ep_re 90.68743708171397

{"global_step": 100000, "eval_re": [90.68743708171397, 90.68743708171397, 
90.68743708171397, 90.68743708171397, 90.68743708171397, 90.68743708171397, 
90.68743708171397, 90.68743708171397, 90.68743708171397, 90.68743708171397], 
"eval_len": [71, 71, 71, 71, 71, 71, 71, 71, 71, 71]}

 11%|█         | 109999/1000000 [1:35:10<9:46:26, 25.29it/s]global step 110000, trans_decision ep_re 2223.411539454189

{"global_step": 110000, "eval_re": [2223.4115394541896, 2223.4115394541896, 
2223.4115394541896, 2223.4115394541896, 2223.4115394541896, 2223.4115394541896, 
2223.4115394541896, 2223.4115394541896, 2223.4115394541896, 2223.4115394541896],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:44:20<9:43:56, 25.12it/s]global step 120000, trans_decision ep_re 702.2425714620839

{"global_step": 120000, "eval_re": [702.2425714620839, 702.2425714620839, 
702.2425714620839, 702.2425714620839, 702.2425714620839, 702.2425714620839, 
702.2425714620839, 702.2425714620839, 702.2425714620839, 702.2425714620839], 
"eval_len": [340, 340, 340, 340, 340, 340, 340, 340, 340, 340]}

 13%|█▎        | 129999/1000000 [1:53:01<9:31:53, 25.35it/s]global step 130000, trans_decision ep_re 37.74803959311038

{"global_step": 130000, "eval_re": [37.74803959311038, 37.74803959311038, 
37.74803959311038, 37.74803959311038, 37.74803959311038, 37.74803959311038, 
37.74803959311038, 37.74803959311038, 37.74803959311038, 37.74803959311038], 
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 14%|█▍        | 139997/1000000 [2:02:00<9:24:37, 25.39it/s]global step 140000, trans_decision ep_re 2198.875784799563

{"global_step": 140000, "eval_re": [2198.8757847995635, 2198.8757847995635, 
2198.8757847995635, 2198.8757847995635, 2198.8757847995635, 2198.8757847995635, 
2198.8757847995635, 2198.8757847995635, 2198.8757847995635, 2198.8757847995635],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:11:10<9:20:37, 25.27it/s]global step 150000, trans_decision ep_re 118.67921566436674

{"global_step": 150000, "eval_re": [118.67921566436675, 118.67921566436675, 
118.67921566436675, 118.67921566436675, 118.67921566436675, 118.67921566436675, 
118.67921566436675, 118.67921566436675, 118.67921566436675, 118.67921566436675],
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 16%|█▌        | 159997/1000000 [2:19:50<9:11:49, 25.37it/s]global step 160000, trans_decision ep_re 920.438448643495

{"global_step": 160000, "eval_re": [920.4384486434949, 920.4384486434949, 
920.4384486434949, 920.4384486434949, 920.4384486434949, 920.4384486434949, 
920.4384486434949, 920.4384486434949, 920.4384486434949, 920.4384486434949], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169999/1000000 [2:29:10<9:02:17, 25.51it/s]global step 170000, trans_decision ep_re 564.1002992385422

{"global_step": 170000, "eval_re": [564.1002992385423, 564.1002992385423, 
564.1002992385423, 564.1002992385423, 564.1002992385423, 564.1002992385423, 
564.1002992385423, 564.1002992385423, 564.1002992385423, 564.1002992385423], 
"eval_len": [250, 250, 250, 250, 250, 250, 250, 250, 250, 250]}

 18%|█▊        | 179998/1000000 [2:37:50<8:50:11, 25.78it/s]global step 180000, trans_decision ep_re 2391.839190394111

{"global_step": 180000, "eval_re": [2391.839190394111, 2391.839190394111, 
2391.839190394111, 2391.839190394111, 2391.839190394111, 2391.839190394111, 
2391.839190394111, 2391.839190394111, 2391.839190394111, 2391.839190394111], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [2:47:00<8:49:55, 25.48it/s]global step 190000, trans_decision ep_re 2356.2839640015227

{"global_step": 190000, "eval_re": [2356.2839640015227, 2356.2839640015227, 
2356.2839640015227, 2356.2839640015227, 2356.2839640015227, 2356.2839640015227, 
2356.2839640015227, 2356.2839640015227, 2356.2839640015227, 2356.2839640015227],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [2:56:20<8:41:26, 25.57it/s]global step 200000, trans_decision ep_re 2376.812500378407

{"global_step": 200000, "eval_re": [2376.8125003784066, 2376.8125003784066, 
2376.8125003784066, 2376.8125003784066, 2376.8125003784066, 2376.8125003784066, 
2376.8125003784066, 2376.8125003784066, 2376.8125003784066, 2376.8125003784066],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209998/1000000 [3:05:30<8:31:01, 25.77it/s]global step 210000, trans_decision ep_re 2008.0090408089673

{"global_step": 210000, "eval_re": [2008.0090408089673, 2008.0090408089673, 
2008.0090408089673, 2008.0090408089673, 2008.0090408089673, 2008.0090408089673, 
2008.0090408089673, 2008.0090408089673, 2008.0090408089673, 2008.0090408089673],
"eval_len": [720, 720, 720, 720, 720, 720, 720, 720, 720, 720]}

 22%|██▏       | 219997/1000000 [3:14:30<8:28:27, 25.57it/s]global step 220000, trans_decision ep_re 996.4352270884017

{"global_step": 220000, "eval_re": [996.4352270884018, 996.4352270884018, 
996.4352270884018, 996.4352270884018, 996.4352270884018, 996.4352270884018, 
996.4352270884018, 996.4352270884018, 996.4352270884018, 996.4352270884018], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229998/1000000 [3:23:40<8:23:39, 25.48it/s]global step 230000, trans_decision ep_re 908.3715555922369

{"global_step": 230000, "eval_re": [908.3715555922369, 908.3715555922369, 
908.3715555922369, 908.3715555922369, 908.3715555922369, 908.3715555922369, 
908.3715555922369, 908.3715555922369, 908.3715555922369, 908.3715555922369], 
"eval_len": [372, 372, 372, 372, 372, 372, 372, 372, 372, 372]}

 24%|██▍       | 239999/1000000 [3:32:30<8:18:30, 25.41it/s]global step 240000, trans_decision ep_re 2169.0069417003733

{"global_step": 240000, "eval_re": [2169.0069417003733, 2169.0069417003733, 
2169.0069417003733, 2169.0069417003733, 2169.0069417003733, 2169.0069417003733, 
2169.0069417003733, 2169.0069417003733, 2169.0069417003733, 2169.0069417003733],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [3:41:40<8:01:26, 25.96it/s]global step 250000, trans_decision ep_re 2031.2715430866308

{"global_step": 250000, "eval_re": [2031.271543086631, 2031.271543086631, 
2031.271543086631, 2031.271543086631, 2031.271543086631, 2031.271543086631, 
2031.271543086631, 2031.271543086631, 2031.271543086631, 2031.271543086631], 
"eval_len": [708, 708, 708, 708, 708, 708, 708, 708, 708, 708]}

 26%|██▌       | 259998/1000000 [3:50:40<7:54:36, 25.99it/s]global step 260000, trans_decision ep_re 2938.689081946992

{"global_step": 260000, "eval_re": [2938.689081946992, 2938.689081946992, 
2938.689081946992, 2938.689081946992, 2938.689081946992, 2938.689081946992, 
2938.689081946992, 2938.689081946992, 2938.689081946992, 2938.689081946992], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [3:59:40<7:57:23, 25.49it/s]global step 270000, trans_decision ep_re 2009.6468310763726

{"global_step": 270000, "eval_re": [2009.646831076372, 2009.646831076372, 
2009.646831076372, 2009.646831076372, 2009.646831076372, 2009.646831076372, 
2009.646831076372, 2009.646831076372, 2009.646831076372, 2009.646831076372], 
"eval_len": [764, 764, 764, 764, 764, 764, 764, 764, 764, 764]}

 28%|██▊       | 279999/1000000 [4:08:40<7:45:06, 25.80it/s]global step 280000, trans_decision ep_re 2777.5813676232433

{"global_step": 280000, "eval_re": [2777.5813676232433, 2777.5813676232433, 
2777.5813676232433, 2777.5813676232433, 2777.5813676232433, 2777.5813676232433, 
2777.5813676232433, 2777.5813676232433, 2777.5813676232433, 2777.5813676232433],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [4:17:50<7:37:09, 25.88it/s]global step 290000, trans_decision ep_re 2104.952271746396

{"global_step": 290000, "eval_re": [2104.952271746396, 2104.952271746396, 
2104.952271746396, 2104.952271746396, 2104.952271746396, 2104.952271746396, 
2104.952271746396, 2104.952271746396, 2104.952271746396, 2104.952271746396], 
"eval_len": [706, 706, 706, 706, 706, 706, 706, 706, 706, 706]}

 30%|██▉       | 299999/1000000 [4:26:50<7:31:53, 25.82it/s]global step 300000, trans_decision ep_re 2966.825108135127

{"global_step": 300000, "eval_re": [2966.825108135127, 2966.825108135127, 
2966.825108135127, 2966.825108135127, 2966.825108135127, 2966.825108135127, 
2966.825108135127, 2966.825108135127, 2966.825108135127, 2966.825108135127], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309997/1000000 [4:35:50<7:25:17, 25.83it/s]global step 310000, trans_decision ep_re 2965.927903889654

{"global_step": 310000, "eval_re": [2965.9279038896543, 2965.9279038896543, 
2965.9279038896543, 2965.9279038896543, 2965.9279038896543, 2965.9279038896543, 
2965.9279038896543, 2965.9279038896543, 2965.9279038896543, 2965.9279038896543],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319998/1000000 [4:45:00<7:19:04, 25.81it/s]global step 320000, trans_decision ep_re 2947.3029409862947

{"global_step": 320000, "eval_re": [2947.302940986295, 2947.302940986295, 
2947.302940986295, 2947.302940986295, 2947.302940986295, 2947.302940986295, 
2947.302940986295, 2947.302940986295, 2947.302940986295, 2947.302940986295], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [4:54:00<7:12:27, 25.82it/s]global step 330000, trans_decision ep_re 1234.9721565727552

{"global_step": 330000, "eval_re": [1234.972156572755, 1234.972156572755, 
1234.972156572755, 1234.972156572755, 1234.972156572755, 1234.972156572755, 
1234.972156572755, 1234.972156572755, 1234.972156572755, 1234.972156572755], 
"eval_len": [439, 439, 439, 439, 439, 439, 439, 439, 439, 439]}

 34%|███▍      | 339999/1000000 [5:02:50<7:06:36, 25.78it/s]global step 340000, trans_decision ep_re 1114.9151392110691

{"global_step": 340000, "eval_re": [1114.9151392110691, 1114.9151392110691, 
1114.9151392110691, 1114.9151392110691, 1114.9151392110691, 1114.9151392110691, 
1114.9151392110691, 1114.9151392110691, 1114.9151392110691, 1114.9151392110691],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [5:12:00<6:59:05, 25.85it/s]global step 350000, trans_decision ep_re 1957.4237607405266

{"global_step": 350000, "eval_re": [1957.4237607405266, 1957.4237607405266, 
1957.4237607405266, 1957.4237607405266, 1957.4237607405266, 1957.4237607405266, 
1957.4237607405266, 1957.4237607405266, 1957.4237607405266, 1957.4237607405266],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359997/1000000 [5:20:46<6:50:39, 25.97it/s]global step 360000, trans_decision ep_re 50.86419848610143

{"global_step": 360000, "eval_re": [50.86419848610143, 50.86419848610143, 
50.86419848610143, 50.86419848610143, 50.86419848610143, 50.86419848610143, 
50.86419848610143, 50.86419848610143, 50.86419848610143, 50.86419848610143], 
"eval_len": [40, 40, 40, 40, 40, 40, 40, 40, 40, 40]}

 37%|███▋      | 369998/1000000 [5:29:40<6:40:24, 26.22it/s]global step 370000, trans_decision ep_re -1283.572856331301

{"global_step": 370000, "eval_re": [-1283.572856331301, -1283.572856331301, 
-1283.572856331301, -1283.572856331301, -1283.572856331301, -1283.572856331301, 
-1283.572856331301, -1283.572856331301, -1283.572856331301, -1283.572856331301],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [5:38:40<6:37:45, 25.98it/s]global step 380000, trans_decision ep_re 2842.7476881682933

{"global_step": 380000, "eval_re": [2842.7476881682937, 2842.7476881682937, 
2842.7476881682937, 2842.7476881682937, 2842.7476881682937, 2842.7476881682937, 
2842.7476881682937, 2842.7476881682937, 2842.7476881682937, 2842.7476881682937],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389997/1000000 [5:47:50<6:33:16, 25.85it/s]global step 390000, trans_decision ep_re 3320.4621515204244

{"global_step": 390000, "eval_re": [3320.4621515204244, 3320.4621515204244, 
3320.4621515204244, 3320.4621515204244, 3320.4621515204244, 3320.4621515204244, 
3320.4621515204244, 3320.4621515204244, 3320.4621515204244, 3320.4621515204244],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [5:56:50<6:27:49, 25.78it/s]global step 400000, trans_decision ep_re 3229.4226775718407

{"global_step": 400000, "eval_re": [3229.4226775718403, 3229.4226775718403, 
3229.4226775718403, 3229.4226775718403, 3229.4226775718403, 3229.4226775718403, 
3229.4226775718403, 3229.4226775718403, 3229.4226775718403, 3229.4226775718403],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409998/1000000 [6:06:00<6:16:20, 26.13it/s]global step 410000, trans_decision ep_re 573.7232611900263

{"global_step": 410000, "eval_re": [573.7232611900263, 573.7232611900263, 
573.7232611900263, 573.7232611900263, 573.7232611900263, 573.7232611900263, 
573.7232611900263, 573.7232611900263, 573.7232611900263, 573.7232611900263], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [6:15:10<6:07:48, 26.28it/s]global step 420000, trans_decision ep_re 2125.816483935596

{"global_step": 420000, "eval_re": [2125.816483935596, 2125.816483935596, 
2125.816483935596, 2125.816483935596, 2125.816483935596, 2125.816483935596, 
2125.816483935596, 2125.816483935596, 2125.816483935596, 2125.816483935596], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [6:24:10<6:07:52, 25.82it/s]global step 430000, trans_decision ep_re 3394.6981250252147

{"global_step": 430000, "eval_re": [3394.6981250252147, 3394.6981250252147, 
3394.6981250252147, 3394.6981250252147, 3394.6981250252147, 3394.6981250252147, 
3394.6981250252147, 3394.6981250252147, 3394.6981250252147, 3394.6981250252147],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [6:33:20<5:58:55, 26.00it/s]global step 440000, trans_decision ep_re 2028.0200375610282

{"global_step": 440000, "eval_re": [2028.0200375610282, 2028.0200375610282, 
2028.0200375610282, 2028.0200375610282, 2028.0200375610282, 2028.0200375610282, 
2028.0200375610282, 2028.0200375610282, 2028.0200375610282, 2028.0200375610282],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [6:42:20<5:51:31, 26.08it/s]global step 450000, trans_decision ep_re 79.23027831690753

{"global_step": 450000, "eval_re": [79.23027831690753, 79.23027831690753, 
79.23027831690753, 79.23027831690753, 79.23027831690753, 79.23027831690753, 
79.23027831690753, 79.23027831690753, 79.23027831690753, 79.23027831690753], 
"eval_len": [68, 68, 68, 68, 68, 68, 68, 68, 68, 68]}

 46%|████▌     | 459998/1000000 [6:51:00<5:48:11, 25.85it/s]global step 460000, trans_decision ep_re 2906.3506882489005

{"global_step": 460000, "eval_re": [2906.3506882489, 2906.3506882489, 
2906.3506882489, 2906.3506882489, 2906.3506882489, 2906.3506882489, 
2906.3506882489, 2906.3506882489, 2906.3506882489, 2906.3506882489], "eval_len":
[1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469998/1000000 [7:00:00<5:40:19, 25.96it/s]global step 470000, trans_decision ep_re 3376.2730438476765

{"global_step": 470000, "eval_re": [3376.273043847676, 3376.273043847676, 
3376.273043847676, 3376.273043847676, 3376.273043847676, 3376.273043847676, 
3376.273043847676, 3376.273043847676, 3376.273043847676, 3376.273043847676], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [7:09:10<5:39:01, 25.56it/s]global step 480000, trans_decision ep_re 476.9048639054098

{"global_step": 480000, "eval_re": [476.9048639054098, 476.9048639054098, 
476.9048639054098, 476.9048639054098, 476.9048639054098, 476.9048639054098, 
476.9048639054098, 476.9048639054098, 476.9048639054098, 476.9048639054098], 
"eval_len": [301, 301, 301, 301, 301, 301, 301, 301, 301, 301]}

 49%|████▉     | 489997/1000000 [7:17:50<5:31:41, 25.63it/s]global step 490000, trans_decision ep_re 617.9741532785612

{"global_step": 490000, "eval_re": [617.9741532785612, 617.9741532785612, 
617.9741532785612, 617.9741532785612, 617.9741532785612, 617.9741532785612, 
617.9741532785612, 617.9741532785612, 617.9741532785612, 617.9741532785612], 
"eval_len": [315, 315, 315, 315, 315, 315, 315, 315, 315, 315]}

 50%|████▉     | 499999/1000000 [7:26:40<5:23:57, 25.72it/s]global step 500000, trans_decision ep_re 1955.4420688799298

{"global_step": 500000, "eval_re": [1955.44206887993, 1955.44206887993, 
1955.44206887993, 1955.44206887993, 1955.44206887993, 1955.44206887993, 
1955.44206887993, 1955.44206887993, 1955.44206887993, 1955.44206887993], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509997/1000000 [7:35:40<5:11:49, 26.19it/s]global step 510000, trans_decision ep_re 2328.051908404493

{"global_step": 510000, "eval_re": [2328.051908404493, 2328.051908404493, 
2328.051908404493, 2328.051908404493, 2328.051908404493, 2328.051908404493, 
2328.051908404493, 2328.051908404493, 2328.051908404493, 2328.051908404493], 
"eval_len": [711, 711, 711, 711, 711, 711, 711, 711, 711, 711]}

 52%|█████▏    | 519998/1000000 [7:44:40<5:08:43, 25.91it/s]global step 520000, trans_decision ep_re 3325.6671038794607

{"global_step": 520000, "eval_re": [3325.667103879461, 3325.667103879461, 
3325.667103879461, 3325.667103879461, 3325.667103879461, 3325.667103879461, 
3325.667103879461, 3325.667103879461, 3325.667103879461, 3325.667103879461], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529999/1000000 [7:53:40<5:04:02, 25.76it/s]global step 530000, trans_decision ep_re 2092.036577264943

{"global_step": 530000, "eval_re": [2092.0365772649434, 2092.0365772649434, 
2092.0365772649434, 2092.0365772649434, 2092.0365772649434, 2092.0365772649434, 
2092.0365772649434, 2092.0365772649434, 2092.0365772649434, 2092.0365772649434],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539997/1000000 [8:02:50<4:58:18, 25.70it/s]global step 540000, trans_decision ep_re 3032.853034591125

{"global_step": 540000, "eval_re": [3032.8530345911254, 3032.8530345911254, 
3032.8530345911254, 3032.8530345911254, 3032.8530345911254, 3032.8530345911254, 
3032.8530345911254, 3032.8530345911254, 3032.8530345911254, 3032.8530345911254],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [8:11:50<4:50:38, 25.81it/s]global step 550000, trans_decision ep_re 2149.441481301167

{"global_step": 550000, "eval_re": [2149.441481301167, 2149.441481301167, 
2149.441481301167, 2149.441481301167, 2149.441481301167, 2149.441481301167, 
2149.441481301167, 2149.441481301167, 2149.441481301167, 2149.441481301167], 
"eval_len": [729, 729, 729, 729, 729, 729, 729, 729, 729, 729]}

 56%|█████▌    | 559998/1000000 [8:20:50<4:38:22, 26.34it/s]global step 560000, trans_decision ep_re 2984.353037644894

{"global_step": 560000, "eval_re": [2984.353037644894, 2984.353037644894, 
2984.353037644894, 2984.353037644894, 2984.353037644894, 2984.353037644894, 
2984.353037644894, 2984.353037644894, 2984.353037644894, 2984.353037644894], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569998/1000000 [8:30:00<4:36:09, 25.95it/s]global step 570000, trans_decision ep_re 1277.053878008963

{"global_step": 570000, "eval_re": [1277.053878008963, 1277.053878008963, 
1277.053878008963, 1277.053878008963, 1277.053878008963, 1277.053878008963, 
1277.053878008963, 1277.053878008963, 1277.053878008963, 1277.053878008963], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [8:39:00<4:32:19, 25.70it/s]global step 580000, trans_decision ep_re 3220.1543085219228

{"global_step": 580000, "eval_re": [3220.1543085219228, 3220.1543085219228, 
3220.1543085219228, 3220.1543085219228, 3220.1543085219228, 3220.1543085219228, 
3220.1543085219228, 3220.1543085219228, 3220.1543085219228, 3220.1543085219228],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [8:48:10<4:24:15, 25.86it/s]global step 590000, trans_decision ep_re 3074.873530592286

{"global_step": 590000, "eval_re": [3074.873530592286, 3074.873530592286, 
3074.873530592286, 3074.873530592286, 3074.873530592286, 3074.873530592286, 
3074.873530592286, 3074.873530592286, 3074.873530592286, 3074.873530592286], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [8:57:10<4:17:35, 25.88it/s]global step 600000, trans_decision ep_re 2365.9078913593266

{"global_step": 600000, "eval_re": [2365.9078913593266, 2365.9078913593266, 
2365.9078913593266, 2365.9078913593266, 2365.9078913593266, 2365.9078913593266, 
2365.9078913593266, 2365.9078913593266, 2365.9078913593266, 2365.9078913593266],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609998/1000000 [9:06:20<4:08:41, 26.14it/s]global step 610000, trans_decision ep_re 2915.243976892713

{"global_step": 610000, "eval_re": [2915.2439768927125, 2915.2439768927125, 
2915.2439768927125, 2915.2439768927125, 2915.2439768927125, 2915.2439768927125, 
2915.2439768927125, 2915.2439768927125, 2915.2439768927125, 2915.2439768927125],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [9:15:30<4:05:03, 25.84it/s]global step 620000, trans_decision ep_re 2902.146030248008

{"global_step": 620000, "eval_re": [2902.146030248008, 2902.146030248008, 
2902.146030248008, 2902.146030248008, 2902.146030248008, 2902.146030248008, 
2902.146030248008, 2902.146030248008, 2902.146030248008, 2902.146030248008], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629998/1000000 [9:24:30<3:58:07, 25.90it/s]global step 630000, trans_decision ep_re 2581.919198263764

{"global_step": 630000, "eval_re": [2581.919198263764, 2581.919198263764, 
2581.919198263764, 2581.919198263764, 2581.919198263764, 2581.919198263764, 
2581.919198263764, 2581.919198263764, 2581.919198263764, 2581.919198263764], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639998/1000000 [9:33:40<3:51:33, 25.91it/s]global step 640000, trans_decision ep_re 2694.996108029015

{"global_step": 640000, "eval_re": [2694.996108029015, 2694.996108029015, 
2694.996108029015, 2694.996108029015, 2694.996108029015, 2694.996108029015, 
2694.996108029015, 2694.996108029015, 2694.996108029015, 2694.996108029015], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [9:42:40<3:46:09, 25.79it/s]global step 650000, trans_decision ep_re 3152.8757268835916

{"global_step": 650000, "eval_re": [3152.875726883592, 3152.875726883592, 
3152.875726883592, 3152.875726883592, 3152.875726883592, 3152.875726883592, 
3152.875726883592, 3152.875726883592, 3152.875726883592, 3152.875726883592], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [9:51:50<3:39:46, 25.78it/s]global step 660000, trans_decision ep_re 1882.9857362199605

{"global_step": 660000, "eval_re": [1882.98573621996, 1882.98573621996, 
1882.98573621996, 1882.98573621996, 1882.98573621996, 1882.98573621996, 
1882.98573621996, 1882.98573621996, 1882.98573621996, 1882.98573621996], 
"eval_len": [663, 663, 663, 663, 663, 663, 663, 663, 663, 663]}

 67%|██████▋   | 669997/1000000 [10:00:40<3:34:27, 25.65it/s]global step 670000, trans_decision ep_re 29.922396871778894

{"global_step": 670000, "eval_re": [29.922396871778886, 29.922396871778886, 
29.922396871778886, 29.922396871778886, 29.922396871778886, 29.922396871778886, 
29.922396871778886, 29.922396871778886, 29.922396871778886, 29.922396871778886],
"eval_len": [33, 33, 33, 33, 33, 33, 33, 33, 33, 33]}

 68%|██████▊   | 679999/1000000 [10:09:10<3:27:46, 25.67it/s]global step 680000, trans_decision ep_re 3279.275974516325

{"global_step": 680000, "eval_re": [3279.2759745163244, 3279.2759745163244, 
3279.2759745163244, 3279.2759745163244, 3279.2759745163244, 3279.2759745163244, 
3279.2759745163244, 3279.2759745163244, 3279.2759745163244, 3279.2759745163244],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689998/1000000 [10:18:20<3:16:28, 26.30it/s]global step 690000, trans_decision ep_re 1145.2082896662944

{"global_step": 690000, "eval_re": [1145.2082896662944, 1145.2082896662944, 
1145.2082896662944, 1145.2082896662944, 1145.2082896662944, 1145.2082896662944, 
1145.2082896662944, 1145.2082896662944, 1145.2082896662944, 1145.2082896662944],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [10:27:30<3:13:06, 25.89it/s]global step 700000, trans_decision ep_re 2986.121620415354

{"global_step": 700000, "eval_re": [2986.121620415354, 2986.121620415354, 
2986.121620415354, 2986.121620415354, 2986.121620415354, 2986.121620415354, 
2986.121620415354, 2986.121620415354, 2986.121620415354, 2986.121620415354], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709997/1000000 [10:36:30<3:06:37, 25.90it/s]global step 710000, trans_decision ep_re 3327.368767472725

{"global_step": 710000, "eval_re": [3327.368767472725, 3327.368767472725, 
3327.368767472725, 3327.368767472725, 3327.368767472725, 3327.368767472725, 
3327.368767472725, 3327.368767472725, 3327.368767472725, 3327.368767472725], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719998/1000000 [10:45:40<3:00:33, 25.85it/s]global step 720000, trans_decision ep_re 1324.1793800353846

{"global_step": 720000, "eval_re": [1324.1793800353846, 1324.1793800353846, 
1324.1793800353846, 1324.1793800353846, 1324.1793800353846, 1324.1793800353846, 
1324.1793800353846, 1324.1793800353846, 1324.1793800353846, 1324.1793800353846],
"eval_len": [447, 447, 447, 447, 447, 447, 447, 447, 447, 447]}

 73%|███████▎  | 729999/1000000 [10:54:20<2:53:19, 25.96it/s]global step 730000, trans_decision ep_re 566.6811626533644

{"global_step": 730000, "eval_re": [566.6811626533644, 566.6811626533644, 
566.6811626533644, 566.6811626533644, 566.6811626533644, 566.6811626533644, 
566.6811626533644, 566.6811626533644, 566.6811626533644, 566.6811626533644], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739998/1000000 [11:03:30<2:45:16, 26.22it/s]global step 740000, trans_decision ep_re 272.8501894088757

{"global_step": 740000, "eval_re": [272.8501894088756, 272.8501894088756, 
272.8501894088756, 272.8501894088756, 272.8501894088756, 272.8501894088756, 
272.8501894088756, 272.8501894088756, 272.8501894088756, 272.8501894088756], 
"eval_len": [179, 179, 179, 179, 179, 179, 179, 179, 179, 179]}

 75%|███████▍  | 749997/1000000 [11:11:53<2:42:23, 25.66it/s]global step 750000, trans_decision ep_re 51.288031545574185

{"global_step": 750000, "eval_re": [51.288031545574185, 51.288031545574185, 
51.288031545574185, 51.288031545574185, 51.288031545574185, 51.288031545574185, 
51.288031545574185, 51.288031545574185, 51.288031545574185, 51.288031545574185],
"eval_len": [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]}

 76%|███████▌  | 759997/1000000 [11:20:40<2:35:51, 25.67it/s]global step 760000, trans_decision ep_re 3323.5435610139043

{"global_step": 760000, "eval_re": [3323.5435610139048, 3323.5435610139048, 
3323.5435610139048, 3323.5435610139048, 3323.5435610139048, 3323.5435610139048, 
3323.5435610139048, 3323.5435610139048, 3323.5435610139048, 3323.5435610139048],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769997/1000000 [11:29:50<2:28:43, 25.78it/s]global step 770000, trans_decision ep_re 2858.599799863984

{"global_step": 770000, "eval_re": [2858.5997998639846, 2858.5997998639846, 
2858.5997998639846, 2858.5997998639846, 2858.5997998639846, 2858.5997998639846, 
2858.5997998639846, 2858.5997998639846, 2858.5997998639846, 2858.5997998639846],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [11:38:50<2:21:13, 25.96it/s]global step 780000, trans_decision ep_re 1503.2129929839589

{"global_step": 780000, "eval_re": [1503.2129929839587, 1503.2129929839587, 
1503.2129929839587, 1503.2129929839587, 1503.2129929839587, 1503.2129929839587, 
1503.2129929839587, 1503.2129929839587, 1503.2129929839587, 1503.2129929839587],
"eval_len": [515, 515, 515, 515, 515, 515, 515, 515, 515, 515]}

 79%|███████▉  | 789999/1000000 [11:47:40<2:16:11, 25.70it/s]global step 790000, trans_decision ep_re 2964.6278056164338

{"global_step": 790000, "eval_re": [2964.6278056164338, 2964.6278056164338, 
2964.6278056164338, 2964.6278056164338, 2964.6278056164338, 2964.6278056164338, 
2964.6278056164338, 2964.6278056164338, 2964.6278056164338, 2964.6278056164338],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [11:56:50<2:09:23, 25.76it/s]global step 800000, trans_decision ep_re 3187.98231974087

{"global_step": 800000, "eval_re": [3187.98231974087, 3187.98231974087, 
3187.98231974087, 3187.98231974087, 3187.98231974087, 3187.98231974087, 
3187.98231974087, 3187.98231974087, 3187.98231974087, 3187.98231974087], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [12:06:00<2:02:17, 25.89it/s]global step 810000, trans_decision ep_re 2723.9528818762224

{"global_step": 810000, "eval_re": [2723.9528818762224, 2723.9528818762224, 
2723.9528818762224, 2723.9528818762224, 2723.9528818762224, 2723.9528818762224, 
2723.9528818762224, 2723.9528818762224, 2723.9528818762224, 2723.9528818762224],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819997/1000000 [12:15:00<1:55:44, 25.92it/s]global step 820000, trans_decision ep_re 889.3975186849686

{"global_step": 820000, "eval_re": [889.3975186849686, 889.3975186849686, 
889.3975186849686, 889.3975186849686, 889.3975186849686, 889.3975186849686, 
889.3975186849686, 889.3975186849686, 889.3975186849686, 889.3975186849686], 
"eval_len": [345, 345, 345, 345, 345, 345, 345, 345, 345, 345]}

 83%|████████▎ | 829999/1000000 [12:23:40<1:50:35, 25.62it/s]global step 830000, trans_decision ep_re 48.89310481106952

{"global_step": 830000, "eval_re": [48.89310481106952, 48.89310481106952, 
48.89310481106952, 48.89310481106952, 48.89310481106952, 48.89310481106952, 
48.89310481106952, 48.89310481106952, 48.89310481106952, 48.89310481106952], 
"eval_len": [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]}

 84%|████████▍ | 839997/1000000 [12:32:20<1:43:55, 25.66it/s]global step 840000, trans_decision ep_re 2192.2017739726684

{"global_step": 840000, "eval_re": [2192.2017739726684, 2192.2017739726684, 
2192.2017739726684, 2192.2017739726684, 2192.2017739726684, 2192.2017739726684, 
2192.2017739726684, 2192.2017739726684, 2192.2017739726684, 2192.2017739726684],
"eval_len": [836, 836, 836, 836, 836, 836, 836, 836, 836, 836]}

 85%|████████▍ | 849998/1000000 [12:41:20<1:35:29, 26.18it/s]global step 850000, trans_decision ep_re 3059.139240959943

{"global_step": 850000, "eval_re": [3059.139240959943, 3059.139240959943, 
3059.139240959943, 3059.139240959943, 3059.139240959943, 3059.139240959943, 
3059.139240959943, 3059.139240959943, 3059.139240959943, 3059.139240959943], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [12:50:20<1:31:08, 25.60it/s]global step 860000, trans_decision ep_re 934.5969004570728

{"global_step": 860000, "eval_re": [934.5969004570728, 934.5969004570728, 
934.5969004570728, 934.5969004570728, 934.5969004570728, 934.5969004570728, 
934.5969004570728, 934.5969004570728, 934.5969004570728, 934.5969004570728], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [12:59:30<1:24:21, 25.68it/s]global step 870000, trans_decision ep_re 2375.2330141034136

{"global_step": 870000, "eval_re": [2375.233014103413, 2375.233014103413, 
2375.233014103413, 2375.233014103413, 2375.233014103413, 2375.233014103413, 
2375.233014103413, 2375.233014103413, 2375.233014103413, 2375.233014103413], 
"eval_len": [809, 809, 809, 809, 809, 809, 809, 809, 809, 809]}

 88%|████████▊ | 879997/1000000 [13:08:30<1:17:23, 25.84it/s]global step 880000, trans_decision ep_re 3492.270490958458

{"global_step": 880000, "eval_re": [3492.2704909584577, 3492.2704909584577, 
3492.2704909584577, 3492.2704909584577, 3492.2704909584577, 3492.2704909584577, 
3492.2704909584577, 3492.2704909584577, 3492.2704909584577, 3492.2704909584577],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [13:17:30<1:11:02, 25.80it/s]global step 890000, trans_decision ep_re 2973.7374734906484

{"global_step": 890000, "eval_re": [2973.7374734906484, 2973.7374734906484, 
2973.7374734906484, 2973.7374734906484, 2973.7374734906484, 2973.7374734906484, 
2973.7374734906484, 2973.7374734906484, 2973.7374734906484, 2973.7374734906484],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [13:26:40<1:04:57, 25.66it/s]global step 900000, trans_decision ep_re 3229.6431823417947

{"global_step": 900000, "eval_re": [3229.6431823417947, 3229.6431823417947, 
3229.6431823417947, 3229.6431823417947, 3229.6431823417947, 3229.6431823417947, 
3229.6431823417947, 3229.6431823417947, 3229.6431823417947, 3229.6431823417947],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [13:35:40<58:34, 25.61it/s]global step 910000, trans_decision ep_re 2610.7791634071973

{"global_step": 910000, "eval_re": [2610.779163407197, 2610.779163407197, 
2610.779163407197, 2610.779163407197, 2610.779163407197, 2610.779163407197, 
2610.779163407197, 2610.779163407197, 2610.779163407197, 2610.779163407197], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919998/1000000 [13:44:50<51:31, 25.88it/s]global step 920000, trans_decision ep_re 2927.611396833094

{"global_step": 920000, "eval_re": [2927.611396833094, 2927.611396833094, 
2927.611396833094, 2927.611396833094, 2927.611396833094, 2927.611396833094, 
2927.611396833094, 2927.611396833094, 2927.611396833094, 2927.611396833094], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [13:54:00<45:10, 25.83it/s]global step 930000, trans_decision ep_re 2939.534734135096

{"global_step": 930000, "eval_re": [2939.534734135096, 2939.534734135096, 
2939.534734135096, 2939.534734135096, 2939.534734135096, 2939.534734135096, 
2939.534734135096, 2939.534734135096, 2939.534734135096, 2939.534734135096], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939997/1000000 [14:03:00<38:21, 26.07it/s]global step 940000, trans_decision ep_re 2690.686402145863

{"global_step": 940000, "eval_re": [2690.6864021458628, 2690.6864021458628, 
2690.6864021458628, 2690.6864021458628, 2690.6864021458628, 2690.6864021458628, 
2690.6864021458628, 2690.6864021458628, 2690.6864021458628, 2690.6864021458628],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [14:12:10<32:19, 25.78it/s]global step 950000, trans_decision ep_re 2846.7498028086675

{"global_step": 950000, "eval_re": [2846.7498028086675, 2846.7498028086675, 
2846.7498028086675, 2846.7498028086675, 2846.7498028086675, 2846.7498028086675, 
2846.7498028086675, 2846.7498028086675, 2846.7498028086675, 2846.7498028086675],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [14:21:10<25:35, 26.05it/s]global step 960000, trans_decision ep_re 115.70525542046543

{"global_step": 960000, "eval_re": [115.70525542046543, 115.70525542046543, 
115.70525542046543, 115.70525542046543, 115.70525542046543, 115.70525542046543, 
115.70525542046543, 115.70525542046543, 115.70525542046543, 115.70525542046543],
"eval_len": [85, 85, 85, 85, 85, 85, 85, 85, 85, 85]}

 97%|█████████▋| 969998/1000000 [14:29:50<19:14, 26.00it/s]global step 970000, trans_decision ep_re 2883.091342676817

{"global_step": 970000, "eval_re": [2883.091342676817, 2883.091342676817, 
2883.091342676817, 2883.091342676817, 2883.091342676817, 2883.091342676817, 
2883.091342676817, 2883.091342676817, 2883.091342676817, 2883.091342676817], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [14:38:50<12:59, 25.67it/s]global step 980000, trans_decision ep_re 3075.0282634626888

{"global_step": 980000, "eval_re": [3075.0282634626888, 3075.0282634626888, 
3075.0282634626888, 3075.0282634626888, 3075.0282634626888, 3075.0282634626888, 
3075.0282634626888, 3075.0282634626888, 3075.0282634626888, 3075.0282634626888],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989998/1000000 [14:48:00<06:21, 26.23it/s]global step 990000, trans_decision ep_re 1294.110558608107

{"global_step": 990000, "eval_re": [1294.1105586081073, 1294.1105586081073, 
1294.1105586081073, 1294.1105586081073, 1294.1105586081073, 1294.1105586081073, 
1294.1105586081073, 1294.1105586081073, 1294.1105586081073, 1294.1105586081073],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999998/1000000 [14:57:00<00:00, 26.31it/s]global step 1000000, trans_decision ep_re 1404.0525992510297

{"global_step": 1000000, "eval_re": [1404.0525992510297, 1404.0525992510297, 
1404.0525992510297, 1404.0525992510297, 1404.0525992510297, 1404.0525992510297, 
1404.0525992510297, 1404.0525992510297, 1404.0525992510297, 1404.0525992510297],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [14:57:36<00:00, 18.57it/s]
