
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:30<10:38:15, 25.85it/s]global step 10000, trans_decision ep_re 56.058911085047995

{"global_step": 10000, "eval_re": [56.058911085048, 56.058911085048, 
56.058911085048, 56.058911085048, 56.058911085048, 56.058911085048, 
56.058911085048, 56.058911085048, 56.058911085048, 56.058911085048], "eval_len":
[1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19998/1000000 [13:40<10:46:46, 25.25it/s]global step 20000, trans_decision ep_re 850.6618656037859

{"global_step": 20000, "eval_re": [850.6618656037859, 850.6618656037859, 
850.6618656037859, 850.6618656037859, 850.6618656037859, 850.6618656037859, 
850.6618656037859, 850.6618656037859, 850.6618656037859, 850.6618656037859], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [23:00<10:31:01, 25.62it/s]global step 30000, trans_decision ep_re 722.5896701717418

{"global_step": 30000, "eval_re": [722.5896701717417, 722.5896701717417, 
722.5896701717417, 722.5896701717417, 722.5896701717417, 722.5896701717417, 
722.5896701717417, 722.5896701717417, 722.5896701717417, 722.5896701717417], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [32:10<10:33:34, 25.25it/s]global step 40000, trans_decision ep_re 1121.8093199633022

{"global_step": 40000, "eval_re": [1121.8093199633022, 1121.8093199633022, 
1121.8093199633022, 1121.8093199633022, 1121.8093199633022, 1121.8093199633022, 
1121.8093199633022, 1121.8093199633022, 1121.8093199633022, 1121.8093199633022],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49999/1000000 [41:20<10:26:00, 25.29it/s]global step 50000, trans_decision ep_re 1173.1368290156447

{"global_step": 50000, "eval_re": [1173.1368290156447, 1173.1368290156447, 
1173.1368290156447, 1173.1368290156447, 1173.1368290156447, 1173.1368290156447, 
1173.1368290156447, 1173.1368290156447, 1173.1368290156447, 1173.1368290156447],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [50:30<10:26:40, 25.00it/s]global step 60000, trans_decision ep_re 4119.47752722334

{"global_step": 60000, "eval_re": [4119.477527223341, 4119.477527223341, 
4119.477527223341, 4119.477527223341, 4119.477527223341, 4119.477527223341, 
4119.477527223341, 4119.477527223341, 4119.477527223341, 4119.477527223341], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [59:40<10:03:01, 25.70it/s]global step 70000, trans_decision ep_re 1657.9625771004255

{"global_step": 70000, "eval_re": [1657.9625771004257, 1657.9625771004257, 
1657.9625771004257, 1657.9625771004257, 1657.9625771004257, 1657.9625771004257, 
1657.9625771004257, 1657.9625771004257, 1657.9625771004257, 1657.9625771004257],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:08:50<10:09:57, 25.14it/s]global step 80000, trans_decision ep_re 2411.497850401393

{"global_step": 80000, "eval_re": [2411.497850401393, 2411.497850401393, 
2411.497850401393, 2411.497850401393, 2411.497850401393, 2411.497850401393, 
2411.497850401393, 2411.497850401393, 2411.497850401393, 2411.497850401393], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89998/1000000 [1:17:50<9:57:05, 25.40it/s]global step 90000, trans_decision ep_re 1425.0631125929299

{"global_step": 90000, "eval_re": [1425.0631125929297, 1425.0631125929297, 
1425.0631125929297, 1425.0631125929297, 1425.0631125929297, 1425.0631125929297, 
1425.0631125929297, 1425.0631125929297, 1425.0631125929297, 1425.0631125929297],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [1:27:00<9:41:21, 25.80it/s]global step 100000, trans_decision ep_re 1169.7657244511206

{"global_step": 100000, "eval_re": [1169.7657244511204, 1169.7657244511204, 
1169.7657244511204, 1169.7657244511204, 1169.7657244511204, 1169.7657244511204, 
1169.7657244511204, 1169.7657244511204, 1169.7657244511204, 1169.7657244511204],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109999/1000000 [1:36:10<9:33:29, 25.87it/s]global step 110000, trans_decision ep_re 4833.976151771274

{"global_step": 110000, "eval_re": [4833.976151771273, 4833.976151771273, 
4833.976151771273, 4833.976151771273, 4833.976151771273, 4833.976151771273, 
4833.976151771273, 4833.976151771273, 4833.976151771273, 4833.976151771273], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119997/1000000 [1:45:10<9:35:01, 25.51it/s]global step 120000, trans_decision ep_re 1544.227927742911

{"global_step": 120000, "eval_re": [1544.227927742911, 1544.227927742911, 
1544.227927742911, 1544.227927742911, 1544.227927742911, 1544.227927742911, 
1544.227927742911, 1544.227927742911, 1544.227927742911, 1544.227927742911], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [1:54:20<9:24:50, 25.67it/s]global step 130000, trans_decision ep_re 1826.8326529408744

{"global_step": 130000, "eval_re": [1826.8326529408744, 1826.8326529408744, 
1826.8326529408744, 1826.8326529408744, 1826.8326529408744, 1826.8326529408744, 
1826.8326529408744, 1826.8326529408744, 1826.8326529408744, 1826.8326529408744],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [2:03:40<9:27:31, 25.26it/s]global step 140000, trans_decision ep_re 2069.0792005731246

{"global_step": 140000, "eval_re": [2069.0792005731246, 2069.0792005731246, 
2069.0792005731246, 2069.0792005731246, 2069.0792005731246, 2069.0792005731246, 
2069.0792005731246, 2069.0792005731246, 2069.0792005731246, 2069.0792005731246],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149997/1000000 [2:12:40<9:14:36, 25.54it/s]global step 150000, trans_decision ep_re 4469.539635880619

{"global_step": 150000, "eval_re": [4469.539635880619, 4469.539635880619, 
4469.539635880619, 4469.539635880619, 4469.539635880619, 4469.539635880619, 
4469.539635880619, 4469.539635880619, 4469.539635880619, 4469.539635880619], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159999/1000000 [2:21:50<9:06:47, 25.60it/s]global step 160000, trans_decision ep_re 1646.881759541068

{"global_step": 160000, "eval_re": [1646.881759541068, 1646.881759541068, 
1646.881759541068, 1646.881759541068, 1646.881759541068, 1646.881759541068, 
1646.881759541068, 1646.881759541068, 1646.881759541068, 1646.881759541068], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169999/1000000 [2:31:00<9:10:36, 25.12it/s]global step 170000, trans_decision ep_re 1212.2725997787163

{"global_step": 170000, "eval_re": [1212.2725997787163, 1212.2725997787163, 
1212.2725997787163, 1212.2725997787163, 1212.2725997787163, 1212.2725997787163, 
1212.2725997787163, 1212.2725997787163, 1212.2725997787163, 1212.2725997787163],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [2:40:00<8:57:07, 25.44it/s]global step 180000, trans_decision ep_re 1310.525017424216

{"global_step": 180000, "eval_re": [1310.525017424216, 1310.525017424216, 
1310.525017424216, 1310.525017424216, 1310.525017424216, 1310.525017424216, 
1310.525017424216, 1310.525017424216, 1310.525017424216, 1310.525017424216], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189998/1000000 [2:49:20<8:37:18, 26.10it/s]global step 190000, trans_decision ep_re 4976.10068788995

{"global_step": 190000, "eval_re": [4976.10068788995, 4976.10068788995, 
4976.10068788995, 4976.10068788995, 4976.10068788995, 4976.10068788995, 
4976.10068788995, 4976.10068788995, 4976.10068788995, 4976.10068788995], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199998/1000000 [2:58:20<8:47:32, 25.27it/s]global step 200000, trans_decision ep_re 4579.992550228475

{"global_step": 200000, "eval_re": [4579.992550228475, 4579.992550228475, 
4579.992550228475, 4579.992550228475, 4579.992550228475, 4579.992550228475, 
4579.992550228475, 4579.992550228475, 4579.992550228475, 4579.992550228475], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:07:30<8:41:03, 25.27it/s]global step 210000, trans_decision ep_re 4867.994851428384

{"global_step": 210000, "eval_re": [4867.994851428385, 4867.994851428385, 
4867.994851428385, 4867.994851428385, 4867.994851428385, 4867.994851428385, 
4867.994851428385, 4867.994851428385, 4867.994851428385, 4867.994851428385], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219997/1000000 [3:16:40<8:30:00, 25.49it/s]global step 220000, trans_decision ep_re 4637.903744419288

{"global_step": 220000, "eval_re": [4637.903744419288, 4637.903744419288, 
4637.903744419288, 4637.903744419288, 4637.903744419288, 4637.903744419288, 
4637.903744419288, 4637.903744419288, 4637.903744419288, 4637.903744419288], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229997/1000000 [3:25:50<8:20:26, 25.64it/s]global step 230000, trans_decision ep_re 1286.990178335313

{"global_step": 230000, "eval_re": [1286.990178335313, 1286.990178335313, 
1286.990178335313, 1286.990178335313, 1286.990178335313, 1286.990178335313, 
1286.990178335313, 1286.990178335313, 1286.990178335313, 1286.990178335313], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239998/1000000 [3:35:00<8:06:50, 26.02it/s]global step 240000, trans_decision ep_re 4874.698999431597

{"global_step": 240000, "eval_re": [4874.698999431597, 4874.698999431597, 
4874.698999431597, 4874.698999431597, 4874.698999431597, 4874.698999431597, 
4874.698999431597, 4874.698999431597, 4874.698999431597, 4874.698999431597], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [3:44:10<8:08:32, 25.59it/s]global step 250000, trans_decision ep_re 4783.022986161554

{"global_step": 250000, "eval_re": [4783.022986161555, 4783.022986161555, 
4783.022986161555, 4783.022986161555, 4783.022986161555, 4783.022986161555, 
4783.022986161555, 4783.022986161555, 4783.022986161555, 4783.022986161555], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [3:53:20<7:45:57, 26.47it/s]global step 260000, trans_decision ep_re 1976.0728855841141

{"global_step": 260000, "eval_re": [1976.0728855841141, 1976.0728855841141, 
1976.0728855841141, 1976.0728855841141, 1976.0728855841141, 1976.0728855841141, 
1976.0728855841141, 1976.0728855841141, 1976.0728855841141, 1976.0728855841141],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [4:02:20<7:58:53, 25.41it/s]global step 270000, trans_decision ep_re 4538.002984459346

{"global_step": 270000, "eval_re": [4538.002984459346, 4538.002984459346, 
4538.002984459346, 4538.002984459346, 4538.002984459346, 4538.002984459346, 
4538.002984459346, 4538.002984459346, 4538.002984459346, 4538.002984459346], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279998/1000000 [4:11:30<7:48:42, 25.60it/s]global step 280000, trans_decision ep_re 4652.386723354824

{"global_step": 280000, "eval_re": [4652.386723354824, 4652.386723354824, 
4652.386723354824, 4652.386723354824, 4652.386723354824, 4652.386723354824, 
4652.386723354824, 4652.386723354824, 4652.386723354824, 4652.386723354824], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289998/1000000 [4:20:40<7:35:56, 25.95it/s]global step 290000, trans_decision ep_re 4965.225162890487

{"global_step": 290000, "eval_re": [4965.225162890488, 4965.225162890488, 
4965.225162890488, 4965.225162890488, 4965.225162890488, 4965.225162890488, 
4965.225162890488, 4965.225162890488, 4965.225162890488, 4965.225162890488], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [4:30:00<7:35:16, 25.63it/s]global step 300000, trans_decision ep_re 2857.6511615335394

{"global_step": 300000, "eval_re": [2857.6511615335394, 2857.6511615335394, 
2857.6511615335394, 2857.6511615335394, 2857.6511615335394, 2857.6511615335394, 
2857.6511615335394, 2857.6511615335394, 2857.6511615335394, 2857.6511615335394],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309997/1000000 [4:39:00<7:34:51, 25.28it/s]global step 310000, trans_decision ep_re 1833.1940024554085

{"global_step": 310000, "eval_re": [1833.1940024554085, 1833.1940024554085, 
1833.1940024554085, 1833.1940024554085, 1833.1940024554085, 1833.1940024554085, 
1833.1940024554085, 1833.1940024554085, 1833.1940024554085, 1833.1940024554085],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [4:48:10<7:35:51, 24.86it/s]global step 320000, trans_decision ep_re 2214.011775682221

{"global_step": 320000, "eval_re": [2214.0117756822206, 2214.0117756822206, 
2214.0117756822206, 2214.0117756822206, 2214.0117756822206, 2214.0117756822206, 
2214.0117756822206, 2214.0117756822206, 2214.0117756822206, 2214.0117756822206],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [4:57:20<7:10:51, 25.92it/s]global step 330000, trans_decision ep_re 1650.16398505577

{"global_step": 330000, "eval_re": [1650.16398505577, 1650.16398505577, 
1650.16398505577, 1650.16398505577, 1650.16398505577, 1650.16398505577, 
1650.16398505577, 1650.16398505577, 1650.16398505577, 1650.16398505577], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339997/1000000 [5:06:30<7:15:23, 25.27it/s]global step 340000, trans_decision ep_re 2912.3828984668044

{"global_step": 340000, "eval_re": [2912.3828984668044, 2912.3828984668044, 
2912.3828984668044, 2912.3828984668044, 2912.3828984668044, 2912.3828984668044, 
2912.3828984668044, 2912.3828984668044, 2912.3828984668044, 2912.3828984668044],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349998/1000000 [5:15:40<7:02:06, 25.66it/s]global step 350000, trans_decision ep_re 4488.993133938487

{"global_step": 350000, "eval_re": [4488.993133938487, 4488.993133938487, 
4488.993133938487, 4488.993133938487, 4488.993133938487, 4488.993133938487, 
4488.993133938487, 4488.993133938487, 4488.993133938487, 4488.993133938487], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [5:25:00<6:51:05, 25.95it/s]global step 360000, trans_decision ep_re 2541.6114876460347

{"global_step": 360000, "eval_re": [2541.6114876460347, 2541.6114876460347, 
2541.6114876460347, 2541.6114876460347, 2541.6114876460347, 2541.6114876460347, 
2541.6114876460347, 2541.6114876460347, 2541.6114876460347, 2541.6114876460347],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [5:34:10<7:08:38, 24.50it/s]global step 370000, trans_decision ep_re 2298.2185363597964

{"global_step": 370000, "eval_re": [2298.2185363597964, 2298.2185363597964, 
2298.2185363597964, 2298.2185363597964, 2298.2185363597964, 2298.2185363597964, 
2298.2185363597964, 2298.2185363597964, 2298.2185363597964, 2298.2185363597964],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [5:43:20<6:50:00, 25.20it/s]global step 380000, trans_decision ep_re 2052.609044793201

{"global_step": 380000, "eval_re": [2052.609044793201, 2052.609044793201, 
2052.609044793201, 2052.609044793201, 2052.609044793201, 2052.609044793201, 
2052.609044793201, 2052.609044793201, 2052.609044793201, 2052.609044793201], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [5:52:30<6:33:54, 25.81it/s]global step 390000, trans_decision ep_re 1624.1210844683296

{"global_step": 390000, "eval_re": [1624.1210844683296, 1624.1210844683296, 
1624.1210844683296, 1624.1210844683296, 1624.1210844683296, 1624.1210844683296, 
1624.1210844683296, 1624.1210844683296, 1624.1210844683296, 1624.1210844683296],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [6:01:40<6:35:52, 25.26it/s]global step 400000, trans_decision ep_re 1217.1418008217004

{"global_step": 400000, "eval_re": [1217.1418008217004, 1217.1418008217004, 
1217.1418008217004, 1217.1418008217004, 1217.1418008217004, 1217.1418008217004, 
1217.1418008217004, 1217.1418008217004, 1217.1418008217004, 1217.1418008217004],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409998/1000000 [6:11:00<6:21:21, 25.78it/s]global step 410000, trans_decision ep_re 1155.8704872579306

{"global_step": 410000, "eval_re": [1155.8704872579306, 1155.8704872579306, 
1155.8704872579306, 1155.8704872579306, 1155.8704872579306, 1155.8704872579306, 
1155.8704872579306, 1155.8704872579306, 1155.8704872579306, 1155.8704872579306],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419999/1000000 [6:20:10<6:16:27, 25.68it/s]global step 420000, trans_decision ep_re 1701.8638764812745

{"global_step": 420000, "eval_re": [1701.8638764812745, 1701.8638764812745, 
1701.8638764812745, 1701.8638764812745, 1701.8638764812745, 1701.8638764812745, 
1701.8638764812745, 1701.8638764812745, 1701.8638764812745, 1701.8638764812745],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [6:29:20<6:06:28, 25.92it/s]global step 430000, trans_decision ep_re 2238.3588450584825

{"global_step": 430000, "eval_re": [2238.3588450584825, 2238.3588450584825, 
2238.3588450584825, 2238.3588450584825, 2238.3588450584825, 2238.3588450584825, 
2238.3588450584825, 2238.3588450584825, 2238.3588450584825, 2238.3588450584825],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439997/1000000 [6:38:20<6:06:09, 25.49it/s]global step 440000, trans_decision ep_re 2642.2766155837207

{"global_step": 440000, "eval_re": [2642.2766155837207, 2642.2766155837207, 
2642.2766155837207, 2642.2766155837207, 2642.2766155837207, 2642.2766155837207, 
2642.2766155837207, 2642.2766155837207, 2642.2766155837207, 2642.2766155837207],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449997/1000000 [6:47:40<5:56:13, 25.73it/s]global step 450000, trans_decision ep_re 1299.694556116228

{"global_step": 450000, "eval_re": [1299.6945561162277, 1299.6945561162277, 
1299.6945561162277, 1299.6945561162277, 1299.6945561162277, 1299.6945561162277, 
1299.6945561162277, 1299.6945561162277, 1299.6945561162277, 1299.6945561162277],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [6:56:50<5:50:20, 25.69it/s]global step 460000, trans_decision ep_re 2824.3389784468336

{"global_step": 460000, "eval_re": [2824.3389784468336, 2824.3389784468336, 
2824.3389784468336, 2824.3389784468336, 2824.3389784468336, 2824.3389784468336, 
2824.3389784468336, 2824.3389784468336, 2824.3389784468336, 2824.3389784468336],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469998/1000000 [7:05:50<5:41:09, 25.89it/s]global step 470000, trans_decision ep_re 1251.8618152320464

{"global_step": 470000, "eval_re": [1251.8618152320464, 1251.8618152320464, 
1251.8618152320464, 1251.8618152320464, 1251.8618152320464, 1251.8618152320464, 
1251.8618152320464, 1251.8618152320464, 1251.8618152320464, 1251.8618152320464],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479998/1000000 [7:15:10<5:33:19, 26.00it/s]global step 480000, trans_decision ep_re 1662.8529999664006

{"global_step": 480000, "eval_re": [1662.8529999664006, 1662.8529999664006, 
1662.8529999664006, 1662.8529999664006, 1662.8529999664006, 1662.8529999664006, 
1662.8529999664006, 1662.8529999664006, 1662.8529999664006, 1662.8529999664006],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [7:24:20<5:27:32, 25.95it/s]global step 490000, trans_decision ep_re 1604.0509920751194

{"global_step": 490000, "eval_re": [1604.0509920751194, 1604.0509920751194, 
1604.0509920751194, 1604.0509920751194, 1604.0509920751194, 1604.0509920751194, 
1604.0509920751194, 1604.0509920751194, 1604.0509920751194, 1604.0509920751194],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499997/1000000 [7:33:30<5:26:36, 25.51it/s]global step 500000, trans_decision ep_re 1629.7258303697781

{"global_step": 500000, "eval_re": [1629.7258303697781, 1629.7258303697781, 
1629.7258303697781, 1629.7258303697781, 1629.7258303697781, 1629.7258303697781, 
1629.7258303697781, 1629.7258303697781, 1629.7258303697781, 1629.7258303697781],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509997/1000000 [7:42:40<5:23:56, 25.21it/s]global step 510000, trans_decision ep_re 1260.219347213746

{"global_step": 510000, "eval_re": [1260.219347213746, 1260.219347213746, 
1260.219347213746, 1260.219347213746, 1260.219347213746, 1260.219347213746, 
1260.219347213746, 1260.219347213746, 1260.219347213746, 1260.219347213746], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [7:52:00<5:10:28, 25.77it/s]global step 520000, trans_decision ep_re 1489.2971759846073

{"global_step": 520000, "eval_re": [1489.297175984607, 1489.297175984607, 
1489.297175984607, 1489.297175984607, 1489.297175984607, 1489.297175984607, 
1489.297175984607, 1489.297175984607, 1489.297175984607, 1489.297175984607], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529998/1000000 [8:01:00<5:02:26, 25.90it/s]global step 530000, trans_decision ep_re 3017.394054873516

{"global_step": 530000, "eval_re": [3017.394054873516, 3017.394054873516, 
3017.394054873516, 3017.394054873516, 3017.394054873516, 3017.394054873516, 
3017.394054873516, 3017.394054873516, 3017.394054873516, 3017.394054873516], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [8:10:10<4:54:15, 26.05it/s]global step 540000, trans_decision ep_re 2367.732490253867

{"global_step": 540000, "eval_re": [2367.732490253867, 2367.732490253867, 
2367.732490253867, 2367.732490253867, 2367.732490253867, 2367.732490253867, 
2367.732490253867, 2367.732490253867, 2367.732490253867, 2367.732490253867], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [8:19:10<4:55:46, 25.36it/s]global step 550000, trans_decision ep_re 1914.4113872665791

{"global_step": 550000, "eval_re": [1914.411387266579, 1914.411387266579, 
1914.411387266579, 1914.411387266579, 1914.411387266579, 1914.411387266579, 
1914.411387266579, 1914.411387266579, 1914.411387266579, 1914.411387266579], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559998/1000000 [8:28:20<4:44:19, 25.79it/s]global step 560000, trans_decision ep_re 1301.9127072113886

{"global_step": 560000, "eval_re": [1301.9127072113888, 1301.9127072113888, 
1301.9127072113888, 1301.9127072113888, 1301.9127072113888, 1301.9127072113888, 
1301.9127072113888, 1301.9127072113888, 1301.9127072113888, 1301.9127072113888],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [8:37:30<4:42:48, 25.34it/s]global step 570000, trans_decision ep_re 2366.0656915742725

{"global_step": 570000, "eval_re": [2366.0656915742725, 2366.0656915742725, 
2366.0656915742725, 2366.0656915742725, 2366.0656915742725, 2366.0656915742725, 
2366.0656915742725, 2366.0656915742725, 2366.0656915742725, 2366.0656915742725],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579998/1000000 [8:46:30<4:34:15, 25.52it/s]global step 580000, trans_decision ep_re 2773.4034795870252

{"global_step": 580000, "eval_re": [2773.4034795870252, 2773.4034795870252, 
2773.4034795870252, 2773.4034795870252, 2773.4034795870252, 2773.4034795870252, 
2773.4034795870252, 2773.4034795870252, 2773.4034795870252, 2773.4034795870252],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589997/1000000 [8:55:40<4:24:28, 25.84it/s]global step 590000, trans_decision ep_re 1299.6114259457247

{"global_step": 590000, "eval_re": [1299.6114259457247, 1299.6114259457247, 
1299.6114259457247, 1299.6114259457247, 1299.6114259457247, 1299.6114259457247, 
1299.6114259457247, 1299.6114259457247, 1299.6114259457247, 1299.6114259457247],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [9:04:50<4:20:14, 25.62it/s]global step 600000, trans_decision ep_re 1914.8405987377223

{"global_step": 600000, "eval_re": [1914.8405987377223, 1914.8405987377223, 
1914.8405987377223, 1914.8405987377223, 1914.8405987377223, 1914.8405987377223, 
1914.8405987377223, 1914.8405987377223, 1914.8405987377223, 1914.8405987377223],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [9:13:50<4:16:40, 25.32it/s]global step 610000, trans_decision ep_re 2660.7943637712024

{"global_step": 610000, "eval_re": [2660.794363771203, 2660.794363771203, 
2660.794363771203, 2660.794363771203, 2660.794363771203, 2660.794363771203, 
2660.794363771203, 2660.794363771203, 2660.794363771203, 2660.794363771203], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [9:23:00<4:09:03, 25.43it/s]global step 620000, trans_decision ep_re 1571.2875681408075

{"global_step": 620000, "eval_re": [1571.2875681408075, 1571.2875681408075, 
1571.2875681408075, 1571.2875681408075, 1571.2875681408075, 1571.2875681408075, 
1571.2875681408075, 1571.2875681408075, 1571.2875681408075, 1571.2875681408075],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [9:32:00<4:04:05, 25.26it/s]global step 630000, trans_decision ep_re 1798.7709562269915

{"global_step": 630000, "eval_re": [1798.7709562269915, 1798.7709562269915, 
1798.7709562269915, 1798.7709562269915, 1798.7709562269915, 1798.7709562269915, 
1798.7709562269915, 1798.7709562269915, 1798.7709562269915, 1798.7709562269915],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [9:41:10<3:56:39, 25.35it/s]global step 640000, trans_decision ep_re 2812.7663506906065

{"global_step": 640000, "eval_re": [2812.7663506906065, 2812.7663506906065, 
2812.7663506906065, 2812.7663506906065, 2812.7663506906065, 2812.7663506906065, 
2812.7663506906065, 2812.7663506906065, 2812.7663506906065, 2812.7663506906065],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [9:50:20<3:43:43, 26.07it/s]global step 650000, trans_decision ep_re 1882.9604104670834

{"global_step": 650000, "eval_re": [1882.9604104670834, 1882.9604104670834, 
1882.9604104670834, 1882.9604104670834, 1882.9604104670834, 1882.9604104670834, 
1882.9604104670834, 1882.9604104670834, 1882.9604104670834, 1882.9604104670834],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [9:59:20<3:38:42, 25.91it/s]global step 660000, trans_decision ep_re 1404.7869119165257

{"global_step": 660000, "eval_re": [1404.7869119165255, 1404.7869119165255, 
1404.7869119165255, 1404.7869119165255, 1404.7869119165255, 1404.7869119165255, 
1404.7869119165255, 1404.7869119165255, 1404.7869119165255, 1404.7869119165255],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [10:08:30<3:30:04, 26.18it/s]global step 670000, trans_decision ep_re 1752.4803886520633

{"global_step": 670000, "eval_re": [1752.4803886520633, 1752.4803886520633, 
1752.4803886520633, 1752.4803886520633, 1752.4803886520633, 1752.4803886520633, 
1752.4803886520633, 1752.4803886520633, 1752.4803886520633, 1752.4803886520633],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679997/1000000 [10:17:30<3:24:35, 26.07it/s]global step 680000, trans_decision ep_re 3106.4317275843673

{"global_step": 680000, "eval_re": [3106.4317275843673, 3106.4317275843673, 
3106.4317275843673, 3106.4317275843673, 3106.4317275843673, 3106.4317275843673, 
3106.4317275843673, 3106.4317275843673, 3106.4317275843673, 3106.4317275843673],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689997/1000000 [10:26:40<3:21:03, 25.70it/s]global step 690000, trans_decision ep_re 2610.424345841071

{"global_step": 690000, "eval_re": [2610.424345841071, 2610.424345841071, 
2610.424345841071, 2610.424345841071, 2610.424345841071, 2610.424345841071, 
2610.424345841071, 2610.424345841071, 2610.424345841071, 2610.424345841071], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699998/1000000 [10:35:50<3:13:47, 25.80it/s]global step 700000, trans_decision ep_re 1376.7457776538317

{"global_step": 700000, "eval_re": [1376.7457776538315, 1376.7457776538315, 
1376.7457776538315, 1376.7457776538315, 1376.7457776538315, 1376.7457776538315, 
1376.7457776538315, 1376.7457776538315, 1376.7457776538315, 1376.7457776538315],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [10:44:50<3:07:35, 25.77it/s]global step 710000, trans_decision ep_re 1454.4145208811112

{"global_step": 710000, "eval_re": [1454.4145208811112, 1454.4145208811112, 
1454.4145208811112, 1454.4145208811112, 1454.4145208811112, 1454.4145208811112, 
1454.4145208811112, 1454.4145208811112, 1454.4145208811112, 1454.4145208811112],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719998/1000000 [10:53:50<2:56:38, 26.42it/s]global step 720000, trans_decision ep_re 1879.6801856469144

{"global_step": 720000, "eval_re": [1879.6801856469147, 1879.6801856469147, 
1879.6801856469147, 1879.6801856469147, 1879.6801856469147, 1879.6801856469147, 
1879.6801856469147, 1879.6801856469147, 1879.6801856469147, 1879.6801856469147],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [11:02:50<2:54:01, 25.86it/s]global step 730000, trans_decision ep_re 1515.0588592010513

{"global_step": 730000, "eval_re": [1515.058859201051, 1515.058859201051, 
1515.058859201051, 1515.058859201051, 1515.058859201051, 1515.058859201051, 
1515.058859201051, 1515.058859201051, 1515.058859201051, 1515.058859201051], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739998/1000000 [11:12:00<2:45:48, 26.13it/s]global step 740000, trans_decision ep_re 1560.2732267860558

{"global_step": 740000, "eval_re": [1560.2732267860558, 1560.2732267860558, 
1560.2732267860558, 1560.2732267860558, 1560.2732267860558, 1560.2732267860558, 
1560.2732267860558, 1560.2732267860558, 1560.2732267860558, 1560.2732267860558],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [11:21:10<2:38:39, 26.26it/s]global step 750000, trans_decision ep_re 1641.343726955801

{"global_step": 750000, "eval_re": [1641.3437269558008, 1641.3437269558008, 
1641.3437269558008, 1641.3437269558008, 1641.3437269558008, 1641.3437269558008, 
1641.3437269558008, 1641.3437269558008, 1641.3437269558008, 1641.3437269558008],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759997/1000000 [11:30:10<2:33:16, 26.10it/s]global step 760000, trans_decision ep_re 1543.8257746701347

{"global_step": 760000, "eval_re": [1543.8257746701347, 1543.8257746701347, 
1543.8257746701347, 1543.8257746701347, 1543.8257746701347, 1543.8257746701347, 
1543.8257746701347, 1543.8257746701347, 1543.8257746701347, 1543.8257746701347],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [11:39:10<2:26:12, 26.22it/s]global step 770000, trans_decision ep_re 1581.7457796639756

{"global_step": 770000, "eval_re": [1581.7457796639756, 1581.7457796639756, 
1581.7457796639756, 1581.7457796639756, 1581.7457796639756, 1581.7457796639756, 
1581.7457796639756, 1581.7457796639756, 1581.7457796639756, 1581.7457796639756],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [11:48:20<2:21:29, 25.91it/s]global step 780000, trans_decision ep_re 2372.5930030464433

{"global_step": 780000, "eval_re": [2372.5930030464433, 2372.5930030464433, 
2372.5930030464433, 2372.5930030464433, 2372.5930030464433, 2372.5930030464433, 
2372.5930030464433, 2372.5930030464433, 2372.5930030464433, 2372.5930030464433],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [11:57:20<2:18:14, 25.32it/s]global step 790000, trans_decision ep_re 2279.9799995733547

{"global_step": 790000, "eval_re": [2279.9799995733542, 2279.9799995733542, 
2279.9799995733542, 2279.9799995733542, 2279.9799995733542, 2279.9799995733542, 
2279.9799995733542, 2279.9799995733542, 2279.9799995733542, 2279.9799995733542],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [12:06:30<2:05:45, 26.51it/s]global step 800000, trans_decision ep_re 1412.7008415586383

{"global_step": 800000, "eval_re": [1412.7008415586383, 1412.7008415586383, 
1412.7008415586383, 1412.7008415586383, 1412.7008415586383, 1412.7008415586383, 
1412.7008415586383, 1412.7008415586383, 1412.7008415586383, 1412.7008415586383],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [12:15:30<2:03:50, 25.57it/s]global step 810000, trans_decision ep_re 1403.025223531942

{"global_step": 810000, "eval_re": [1403.025223531942, 1403.025223531942, 
1403.025223531942, 1403.025223531942, 1403.025223531942, 1403.025223531942, 
1403.025223531942, 1403.025223531942, 1403.025223531942, 1403.025223531942], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [12:24:30<1:55:29, 25.97it/s]global step 820000, trans_decision ep_re 2511.118415057049

{"global_step": 820000, "eval_re": [2511.118415057049, 2511.118415057049, 
2511.118415057049, 2511.118415057049, 2511.118415057049, 2511.118415057049, 
2511.118415057049, 2511.118415057049, 2511.118415057049, 2511.118415057049], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [12:33:30<1:50:54, 25.55it/s]global step 830000, trans_decision ep_re 1856.4431596402133

{"global_step": 830000, "eval_re": [1856.4431596402135, 1856.4431596402135, 
1856.4431596402135, 1856.4431596402135, 1856.4431596402135, 1856.4431596402135, 
1856.4431596402135, 1856.4431596402135, 1856.4431596402135, 1856.4431596402135],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839998/1000000 [12:42:40<1:43:35, 25.74it/s]global step 840000, trans_decision ep_re 1787.1150743127287

{"global_step": 840000, "eval_re": [1787.1150743127287, 1787.1150743127287, 
1787.1150743127287, 1787.1150743127287, 1787.1150743127287, 1787.1150743127287, 
1787.1150743127287, 1787.1150743127287, 1787.1150743127287, 1787.1150743127287],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [12:51:50<1:36:40, 25.86it/s]global step 850000, trans_decision ep_re 1520.1905746934915

{"global_step": 850000, "eval_re": [1520.1905746934915, 1520.1905746934915, 
1520.1905746934915, 1520.1905746934915, 1520.1905746934915, 1520.1905746934915, 
1520.1905746934915, 1520.1905746934915, 1520.1905746934915, 1520.1905746934915],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [13:00:50<1:30:58, 25.65it/s]global step 860000, trans_decision ep_re 1717.0316137426548

{"global_step": 860000, "eval_re": [1717.0316137426548, 1717.0316137426548, 
1717.0316137426548, 1717.0316137426548, 1717.0316137426548, 1717.0316137426548, 
1717.0316137426548, 1717.0316137426548, 1717.0316137426548, 1717.0316137426548],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [13:09:50<1:24:25, 25.66it/s]global step 870000, trans_decision ep_re 1544.649442311201

{"global_step": 870000, "eval_re": [1544.6494423112008, 1544.6494423112008, 
1544.6494423112008, 1544.6494423112008, 1544.6494423112008, 1544.6494423112008, 
1544.6494423112008, 1544.6494423112008, 1544.6494423112008, 1544.6494423112008],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [13:19:00<1:17:13, 25.90it/s]global step 880000, trans_decision ep_re 1621.2939980476172

{"global_step": 880000, "eval_re": [1621.2939980476172, 1621.2939980476172, 
1621.2939980476172, 1621.2939980476172, 1621.2939980476172, 1621.2939980476172, 
1621.2939980476172, 1621.2939980476172, 1621.2939980476172, 1621.2939980476172],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889997/1000000 [13:28:00<1:10:08, 26.14it/s]global step 890000, trans_decision ep_re 1873.182994686922

{"global_step": 890000, "eval_re": [1873.182994686922, 1873.182994686922, 
1873.182994686922, 1873.182994686922, 1873.182994686922, 1873.182994686922, 
1873.182994686922, 1873.182994686922, 1873.182994686922, 1873.182994686922], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [13:37:00<1:03:34, 26.22it/s]global step 900000, trans_decision ep_re 1489.6172179335133

{"global_step": 900000, "eval_re": [1489.6172179335133, 1489.6172179335133, 
1489.6172179335133, 1489.6172179335133, 1489.6172179335133, 1489.6172179335133, 
1489.6172179335133, 1489.6172179335133, 1489.6172179335133, 1489.6172179335133],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [13:46:00<58:46, 25.52it/s]global step 910000, trans_decision ep_re -178.9015033500379

{"global_step": 910000, "eval_re": [-178.9015033500379, -178.9015033500379, 
-178.9015033500379, -178.9015033500379, -178.9015033500379, -178.9015033500379, 
-178.9015033500379, -178.9015033500379, -178.9015033500379, -178.9015033500379],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [13:55:00<52:04, 25.61it/s]global step 920000, trans_decision ep_re 1921.51816078952

{"global_step": 920000, "eval_re": [1921.5181607895197, 1921.5181607895197, 
1921.5181607895197, 1921.5181607895197, 1921.5181607895197, 1921.5181607895197, 
1921.5181607895197, 1921.5181607895197, 1921.5181607895197, 1921.5181607895197],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929997/1000000 [14:04:10<45:43, 25.52it/s]global step 930000, trans_decision ep_re 1975.3586636434145

{"global_step": 930000, "eval_re": [1975.3586636434145, 1975.3586636434145, 
1975.3586636434145, 1975.3586636434145, 1975.3586636434145, 1975.3586636434145, 
1975.3586636434145, 1975.3586636434145, 1975.3586636434145, 1975.3586636434145],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [14:13:10<38:24, 26.04it/s]global step 940000, trans_decision ep_re 1901.5088663819529

{"global_step": 940000, "eval_re": [1901.508866381953, 1901.508866381953, 
1901.508866381953, 1901.508866381953, 1901.508866381953, 1901.508866381953, 
1901.508866381953, 1901.508866381953, 1901.508866381953, 1901.508866381953], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [14:22:20<31:48, 26.20it/s]global step 950000, trans_decision ep_re 1516.8989212598829

{"global_step": 950000, "eval_re": [1516.8989212598829, 1516.8989212598829, 
1516.8989212598829, 1516.8989212598829, 1516.8989212598829, 1516.8989212598829, 
1516.8989212598829, 1516.8989212598829, 1516.8989212598829, 1516.8989212598829],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [14:31:20<25:38, 26.01it/s]global step 960000, trans_decision ep_re 1455.2130977201127

{"global_step": 960000, "eval_re": [1455.213097720113, 1455.213097720113, 
1455.213097720113, 1455.213097720113, 1455.213097720113, 1455.213097720113, 
1455.213097720113, 1455.213097720113, 1455.213097720113, 1455.213097720113], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [14:40:30<19:26, 25.72it/s]global step 970000, trans_decision ep_re 1983.5871129612995

{"global_step": 970000, "eval_re": [1983.5871129612995, 1983.5871129612995, 
1983.5871129612995, 1983.5871129612995, 1983.5871129612995, 1983.5871129612995, 
1983.5871129612995, 1983.5871129612995, 1983.5871129612995, 1983.5871129612995],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [14:49:30<13:06, 25.42it/s]global step 980000, trans_decision ep_re 1307.93994759827

{"global_step": 980000, "eval_re": [1307.93994759827, 1307.93994759827, 
1307.93994759827, 1307.93994759827, 1307.93994759827, 1307.93994759827, 
1307.93994759827, 1307.93994759827, 1307.93994759827, 1307.93994759827], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [14:58:40<06:27, 25.84it/s]global step 990000, trans_decision ep_re 1815.7501234344295

{"global_step": 990000, "eval_re": [1815.7501234344295, 1815.7501234344295, 
1815.7501234344295, 1815.7501234344295, 1815.7501234344295, 1815.7501234344295, 
1815.7501234344295, 1815.7501234344295, 1815.7501234344295, 1815.7501234344295],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [15:07:50<00:00, 25.51it/s]global step 1000000, trans_decision ep_re 2245.200513866918

{"global_step": 1000000, "eval_re": [2245.2005138669174, 2245.2005138669174, 
2245.2005138669174, 2245.2005138669174, 2245.2005138669174, 2245.2005138669174, 
2245.2005138669174, 2245.2005138669174, 2245.2005138669174, 2245.2005138669174],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [15:08:20<00:00, 18.35it/s]
