
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [05:40<13:14:26, 20.77it/s]global step 10000, trans_decision ep_re -132.17283322280224

{"global_step": 10000, "eval_re": [-132.17283322280227, -132.17283322280227, 
-132.17283322280227, -132.17283322280227, -132.17283322280227, 
-132.17283322280227, -132.17283322280227, -132.17283322280227, 
-132.17283322280227, -132.17283322280227], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19998/1000000 [17:10<13:01:30, 20.90it/s]global step 20000, trans_decision ep_re 330.4224747240702

{"global_step": 20000, "eval_re": [330.4224747240702, 330.4224747240702, 
330.4224747240702, 330.4224747240702, 330.4224747240702, 330.4224747240702, 
330.4224747240702, 330.4224747240702, 330.4224747240702, 330.4224747240702], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [28:50<13:11:28, 20.43it/s]global step 30000, trans_decision ep_re 938.5087490740476

{"global_step": 30000, "eval_re": [938.5087490740475, 938.5087490740475, 
938.5087490740475, 938.5087490740475, 938.5087490740475, 938.5087490740475, 
938.5087490740475, 938.5087490740475, 938.5087490740475, 938.5087490740475], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [40:20<12:54:52, 20.65it/s]global step 40000, trans_decision ep_re 1037.063782006464

{"global_step": 40000, "eval_re": [1037.063782006464, 1037.063782006464, 
1037.063782006464, 1037.063782006464, 1037.063782006464, 1037.063782006464, 
1037.063782006464, 1037.063782006464, 1037.063782006464, 1037.063782006464], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49998/1000000 [51:40<12:43:35, 20.74it/s]global step 50000, trans_decision ep_re 1197.750737974896

{"global_step": 50000, "eval_re": [1197.7507379748959, 1197.7507379748959, 
1197.7507379748959, 1197.7507379748959, 1197.7507379748959, 1197.7507379748959, 
1197.7507379748959, 1197.7507379748959, 1197.7507379748959, 1197.7507379748959],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [1:03:10<12:34:29, 20.76it/s]global step 60000, trans_decision ep_re 1188.2404576428785

{"global_step": 60000, "eval_re": [1188.2404576428787, 1188.2404576428787, 
1188.2404576428787, 1188.2404576428787, 1188.2404576428787, 1188.2404576428787, 
1188.2404576428787, 1188.2404576428787, 1188.2404576428787, 1188.2404576428787],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [1:14:40<12:26:07, 20.77it/s]global step 70000, trans_decision ep_re 1663.0290297937458

{"global_step": 70000, "eval_re": [1663.029029793746, 1663.029029793746, 
1663.029029793746, 1663.029029793746, 1663.029029793746, 1663.029029793746, 
1663.029029793746, 1663.029029793746, 1663.029029793746, 1663.029029793746], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:26:10<12:30:37, 20.43it/s]global step 80000, trans_decision ep_re 1691.861645443782

{"global_step": 80000, "eval_re": [1691.861645443782, 1691.861645443782, 
1691.861645443782, 1691.861645443782, 1691.861645443782, 1691.861645443782, 
1691.861645443782, 1691.861645443782, 1691.861645443782, 1691.861645443782], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:37:30<12:16:13, 20.60it/s]global step 90000, trans_decision ep_re 1934.7470258377512

{"global_step": 90000, "eval_re": [1934.747025837751, 1934.747025837751, 
1934.747025837751, 1934.747025837751, 1934.747025837751, 1934.747025837751, 
1934.747025837751, 1934.747025837751, 1934.747025837751, 1934.747025837751], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:48:50<11:55:47, 20.96it/s]global step 100000, trans_decision ep_re 1219.4155589181664

{"global_step": 100000, "eval_re": [1219.4155589181664, 1219.4155589181664, 
1219.4155589181664, 1219.4155589181664, 1219.4155589181664, 1219.4155589181664, 
1219.4155589181664, 1219.4155589181664, 1219.4155589181664, 1219.4155589181664],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [2:00:00<11:42:11, 21.12it/s]global step 110000, trans_decision ep_re 1239.439768030514

{"global_step": 110000, "eval_re": [1239.439768030514, 1239.439768030514, 
1239.439768030514, 1239.439768030514, 1239.439768030514, 1239.439768030514, 
1239.439768030514, 1239.439768030514, 1239.439768030514, 1239.439768030514], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [2:11:20<11:45:12, 20.80it/s]global step 120000, trans_decision ep_re 2140.419258784562

{"global_step": 120000, "eval_re": [2140.419258784562, 2140.419258784562, 
2140.419258784562, 2140.419258784562, 2140.419258784562, 2140.419258784562, 
2140.419258784562, 2140.419258784562, 2140.419258784562, 2140.419258784562], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129998/1000000 [2:22:30<11:22:17, 21.25it/s]global step 130000, trans_decision ep_re 1328.2236769324263

{"global_step": 130000, "eval_re": [1328.2236769324263, 1328.2236769324263, 
1328.2236769324263, 1328.2236769324263, 1328.2236769324263, 1328.2236769324263, 
1328.2236769324263, 1328.2236769324263, 1328.2236769324263, 1328.2236769324263],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [2:33:40<11:22:31, 21.00it/s]global step 140000, trans_decision ep_re 1675.7544435325167

{"global_step": 140000, "eval_re": [1675.7544435325167, 1675.7544435325167, 
1675.7544435325167, 1675.7544435325167, 1675.7544435325167, 1675.7544435325167, 
1675.7544435325167, 1675.7544435325167, 1675.7544435325167, 1675.7544435325167],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:45:00<11:05:43, 21.28it/s]global step 150000, trans_decision ep_re 4860.252239789877

{"global_step": 150000, "eval_re": [4860.252239789877, 4860.252239789877, 
4860.252239789877, 4860.252239789877, 4860.252239789877, 4860.252239789877, 
4860.252239789877, 4860.252239789877, 4860.252239789877, 4860.252239789877], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [2:56:10<10:55:21, 21.36it/s]global step 160000, trans_decision ep_re 4180.616482132311

{"global_step": 160000, "eval_re": [4180.616482132311, 4180.616482132311, 
4180.616482132311, 4180.616482132311, 4180.616482132311, 4180.616482132311, 
4180.616482132311, 4180.616482132311, 4180.616482132311, 4180.616482132311], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169998/1000000 [3:07:20<10:57:51, 21.03it/s]global step 170000, trans_decision ep_re 4286.509095987303

{"global_step": 170000, "eval_re": [4286.509095987303, 4286.509095987303, 
4286.509095987303, 4286.509095987303, 4286.509095987303, 4286.509095987303, 
4286.509095987303, 4286.509095987303, 4286.509095987303, 4286.509095987303], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179999/1000000 [3:18:30<10:40:41, 21.33it/s]global step 180000, trans_decision ep_re 5059.9250516404745

{"global_step": 180000, "eval_re": [5059.925051640475, 5059.925051640475, 
5059.925051640475, 5059.925051640475, 5059.925051640475, 5059.925051640475, 
5059.925051640475, 5059.925051640475, 5059.925051640475, 5059.925051640475], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [3:29:50<10:45:28, 20.91it/s]global step 190000, trans_decision ep_re 4998.475148583498

{"global_step": 190000, "eval_re": [4998.475148583497, 4998.475148583497, 
4998.475148583497, 4998.475148583497, 4998.475148583497, 4998.475148583497, 
4998.475148583497, 4998.475148583497, 4998.475148583497, 4998.475148583497], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [3:41:00<10:37:20, 20.92it/s]global step 200000, trans_decision ep_re 2748.362287273922

{"global_step": 200000, "eval_re": [2748.362287273922, 2748.362287273922, 
2748.362287273922, 2748.362287273922, 2748.362287273922, 2748.362287273922, 
2748.362287273922, 2748.362287273922, 2748.362287273922, 2748.362287273922], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:52:10<10:29:09, 20.93it/s]global step 210000, trans_decision ep_re 2355.0557927320397

{"global_step": 210000, "eval_re": [2355.0557927320397, 2355.0557927320397, 
2355.0557927320397, 2355.0557927320397, 2355.0557927320397, 2355.0557927320397, 
2355.0557927320397, 2355.0557927320397, 2355.0557927320397, 2355.0557927320397],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219998/1000000 [4:03:20<10:11:33, 21.26it/s]global step 220000, trans_decision ep_re 1607.2004482053112

{"global_step": 220000, "eval_re": [1607.2004482053114, 1607.2004482053114, 
1607.2004482053114, 1607.2004482053114, 1607.2004482053114, 1607.2004482053114, 
1607.2004482053114, 1607.2004482053114, 1607.2004482053114, 1607.2004482053114],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [4:14:40<10:06:26, 21.16it/s]global step 230000, trans_decision ep_re 1654.0809371934458

{"global_step": 230000, "eval_re": [1654.0809371934458, 1654.0809371934458, 
1654.0809371934458, 1654.0809371934458, 1654.0809371934458, 1654.0809371934458, 
1654.0809371934458, 1654.0809371934458, 1654.0809371934458, 1654.0809371934458],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [4:25:50<10:00:51, 21.08it/s]global step 240000, trans_decision ep_re 1885.4968869478569

{"global_step": 240000, "eval_re": [1885.4968869478569, 1885.4968869478569, 
1885.4968869478569, 1885.4968869478569, 1885.4968869478569, 1885.4968869478569, 
1885.4968869478569, 1885.4968869478569, 1885.4968869478569, 1885.4968869478569],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [4:37:00<9:55:47, 20.98it/s]global step 250000, trans_decision ep_re 1205.2819746059072

{"global_step": 250000, "eval_re": [1205.2819746059072, 1205.2819746059072, 
1205.2819746059072, 1205.2819746059072, 1205.2819746059072, 1205.2819746059072, 
1205.2819746059072, 1205.2819746059072, 1205.2819746059072, 1205.2819746059072],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [4:48:10<9:46:19, 21.04it/s]global step 260000, trans_decision ep_re 2582.1695989494915

{"global_step": 260000, "eval_re": [2582.1695989494915, 2582.1695989494915, 
2582.1695989494915, 2582.1695989494915, 2582.1695989494915, 2582.1695989494915, 
2582.1695989494915, 2582.1695989494915, 2582.1695989494915, 2582.1695989494915],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [4:59:20<9:44:29, 20.82it/s]global step 270000, trans_decision ep_re 3965.3834850488543

{"global_step": 270000, "eval_re": [3965.383485048854, 3965.383485048854, 
3965.383485048854, 3965.383485048854, 3965.383485048854, 3965.383485048854, 
3965.383485048854, 3965.383485048854, 3965.383485048854, 3965.383485048854], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [5:10:30<9:35:28, 20.85it/s]global step 280000, trans_decision ep_re 3618.9397367857227

{"global_step": 280000, "eval_re": [3618.939736785723, 3618.939736785723, 
3618.939736785723, 3618.939736785723, 3618.939736785723, 3618.939736785723, 
3618.939736785723, 3618.939736785723, 3618.939736785723, 3618.939736785723], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [5:21:50<9:20:36, 21.11it/s]global step 290000, trans_decision ep_re 1340.8488991790675

{"global_step": 290000, "eval_re": [1340.8488991790675, 1340.8488991790675, 
1340.8488991790675, 1340.8488991790675, 1340.8488991790675, 1340.8488991790675, 
1340.8488991790675, 1340.8488991790675, 1340.8488991790675, 1340.8488991790675],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [5:33:00<9:14:26, 21.04it/s]global step 300000, trans_decision ep_re 1768.3836793981168

{"global_step": 300000, "eval_re": [1768.383679398117, 1768.383679398117, 
1768.383679398117, 1768.383679398117, 1768.383679398117, 1768.383679398117, 
1768.383679398117, 1768.383679398117, 1768.383679398117, 1768.383679398117], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309998/1000000 [5:44:10<9:05:46, 21.07it/s]global step 310000, trans_decision ep_re 1586.642390825373

{"global_step": 310000, "eval_re": [1586.642390825373, 1586.642390825373, 
1586.642390825373, 1586.642390825373, 1586.642390825373, 1586.642390825373, 
1586.642390825373, 1586.642390825373, 1586.642390825373, 1586.642390825373], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [5:55:30<8:59:39, 21.00it/s]global step 320000, trans_decision ep_re 3037.4745518398045

{"global_step": 320000, "eval_re": [3037.4745518398045, 3037.4745518398045, 
3037.4745518398045, 3037.4745518398045, 3037.4745518398045, 3037.4745518398045, 
3037.4745518398045, 3037.4745518398045, 3037.4745518398045, 3037.4745518398045],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329998/1000000 [6:06:40<8:41:35, 21.41it/s]global step 330000, trans_decision ep_re 2489.546051199886

{"global_step": 330000, "eval_re": [2489.546051199886, 2489.546051199886, 
2489.546051199886, 2489.546051199886, 2489.546051199886, 2489.546051199886, 
2489.546051199886, 2489.546051199886, 2489.546051199886, 2489.546051199886], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [6:18:00<8:40:26, 21.14it/s]global step 340000, trans_decision ep_re 4672.550127170552

{"global_step": 340000, "eval_re": [4672.550127170552, 4672.550127170552, 
4672.550127170552, 4672.550127170552, 4672.550127170552, 4672.550127170552, 
4672.550127170552, 4672.550127170552, 4672.550127170552, 4672.550127170552], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349998/1000000 [6:29:10<8:24:17, 21.48it/s]global step 350000, trans_decision ep_re 2345.044885324157

{"global_step": 350000, "eval_re": [2345.0448853241564, 2345.0448853241564, 
2345.0448853241564, 2345.0448853241564, 2345.0448853241564, 2345.0448853241564, 
2345.0448853241564, 2345.0448853241564, 2345.0448853241564, 2345.0448853241564],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [6:40:30<8:27:43, 21.01it/s]global step 360000, trans_decision ep_re 3500.4323866944005

{"global_step": 360000, "eval_re": [3500.4323866944005, 3500.4323866944005, 
3500.4323866944005, 3500.4323866944005, 3500.4323866944005, 3500.4323866944005, 
3500.4323866944005, 3500.4323866944005, 3500.4323866944005, 3500.4323866944005],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [6:51:40<8:18:28, 21.06it/s]global step 370000, trans_decision ep_re 1628.8013239320303

{"global_step": 370000, "eval_re": [1628.8013239320303, 1628.8013239320303, 
1628.8013239320303, 1628.8013239320303, 1628.8013239320303, 1628.8013239320303, 
1628.8013239320303, 1628.8013239320303, 1628.8013239320303, 1628.8013239320303],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [7:03:00<8:15:40, 20.85it/s]global step 380000, trans_decision ep_re 4422.523783714552

{"global_step": 380000, "eval_re": [4422.523783714551, 4422.523783714551, 
4422.523783714551, 4422.523783714551, 4422.523783714551, 4422.523783714551, 
4422.523783714551, 4422.523783714551, 4422.523783714551, 4422.523783714551], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [7:14:10<7:57:32, 21.29it/s]global step 390000, trans_decision ep_re 4722.789646979197

{"global_step": 390000, "eval_re": [4722.789646979197, 4722.789646979197, 
4722.789646979197, 4722.789646979197, 4722.789646979197, 4722.789646979197, 
4722.789646979197, 4722.789646979197, 4722.789646979197, 4722.789646979197], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399998/1000000 [7:25:20<7:45:39, 21.48it/s]global step 400000, trans_decision ep_re 4262.501164073501

{"global_step": 400000, "eval_re": [4262.5011640735, 4262.5011640735, 
4262.5011640735, 4262.5011640735, 4262.5011640735, 4262.5011640735, 
4262.5011640735, 4262.5011640735, 4262.5011640735, 4262.5011640735], "eval_len":
[1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409998/1000000 [7:36:40<7:41:29, 21.31it/s]global step 410000, trans_decision ep_re 3543.249108050576

{"global_step": 410000, "eval_re": [3543.2491080505756, 3543.2491080505756, 
3543.2491080505756, 3543.2491080505756, 3543.2491080505756, 3543.2491080505756, 
3543.2491080505756, 3543.2491080505756, 3543.2491080505756, 3543.2491080505756],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [7:48:00<7:27:37, 21.60it/s]global step 420000, trans_decision ep_re 1245.1822500475043

{"global_step": 420000, "eval_re": [1245.1822500475043, 1245.1822500475043, 
1245.1822500475043, 1245.1822500475043, 1245.1822500475043, 1245.1822500475043, 
1245.1822500475043, 1245.1822500475043, 1245.1822500475043, 1245.1822500475043],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429998/1000000 [7:59:10<7:28:11, 21.20it/s]global step 430000, trans_decision ep_re 1404.3654238414435

{"global_step": 430000, "eval_re": [1404.3654238414435, 1404.3654238414435, 
1404.3654238414435, 1404.3654238414435, 1404.3654238414435, 1404.3654238414435, 
1404.3654238414435, 1404.3654238414435, 1404.3654238414435, 1404.3654238414435],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [8:10:20<7:23:36, 21.04it/s]global step 440000, trans_decision ep_re 4275.605419038882

{"global_step": 440000, "eval_re": [4275.605419038882, 4275.605419038882, 
4275.605419038882, 4275.605419038882, 4275.605419038882, 4275.605419038882, 
4275.605419038882, 4275.605419038882, 4275.605419038882, 4275.605419038882], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [8:21:30<7:17:46, 20.94it/s]global step 450000, trans_decision ep_re 3272.4121015740325

{"global_step": 450000, "eval_re": [3272.412101574033, 3272.412101574033, 
3272.412101574033, 3272.412101574033, 3272.412101574033, 3272.412101574033, 
3272.412101574033, 3272.412101574033, 3272.412101574033, 3272.412101574033], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [8:32:50<7:05:30, 21.15it/s]global step 460000, trans_decision ep_re 1131.929153704126

{"global_step": 460000, "eval_re": [1131.929153704126, 1131.929153704126, 
1131.929153704126, 1131.929153704126, 1131.929153704126, 1131.929153704126, 
1131.929153704126, 1131.929153704126, 1131.929153704126, 1131.929153704126], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [8:44:00<7:00:38, 21.00it/s]global step 470000, trans_decision ep_re 1481.8447107959407

{"global_step": 470000, "eval_re": [1481.844710795941, 1481.844710795941, 
1481.844710795941, 1481.844710795941, 1481.844710795941, 1481.844710795941, 
1481.844710795941, 1481.844710795941, 1481.844710795941, 1481.844710795941], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479998/1000000 [8:55:10<6:46:33, 21.32it/s]global step 480000, trans_decision ep_re 2476.194384802872

{"global_step": 480000, "eval_re": [2476.194384802872, 2476.194384802872, 
2476.194384802872, 2476.194384802872, 2476.194384802872, 2476.194384802872, 
2476.194384802872, 2476.194384802872, 2476.194384802872, 2476.194384802872], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [9:06:20<6:41:28, 21.17it/s]global step 490000, trans_decision ep_re 1466.0843504139789

{"global_step": 490000, "eval_re": [1466.0843504139789, 1466.0843504139789, 
1466.0843504139789, 1466.0843504139789, 1466.0843504139789, 1466.0843504139789, 
1466.0843504139789, 1466.0843504139789, 1466.0843504139789, 1466.0843504139789],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499998/1000000 [9:17:40<6:36:16, 21.03it/s]global step 500000, trans_decision ep_re 1464.2542912409401

{"global_step": 500000, "eval_re": [1464.25429124094, 1464.25429124094, 
1464.25429124094, 1464.25429124094, 1464.25429124094, 1464.25429124094, 
1464.25429124094, 1464.25429124094, 1464.25429124094, 1464.25429124094], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509998/1000000 [9:29:00<6:28:44, 21.01it/s]global step 510000, trans_decision ep_re 1552.4867576968172

{"global_step": 510000, "eval_re": [1552.4867576968172, 1552.4867576968172, 
1552.4867576968172, 1552.4867576968172, 1552.4867576968172, 1552.4867576968172, 
1552.4867576968172, 1552.4867576968172, 1552.4867576968172, 1552.4867576968172],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [9:40:10<6:23:39, 20.85it/s]global step 520000, trans_decision ep_re 3321.557745406554

{"global_step": 520000, "eval_re": [3321.5577454065537, 3321.5577454065537, 
3321.5577454065537, 3321.5577454065537, 3321.5577454065537, 3321.5577454065537, 
3321.5577454065537, 3321.5577454065537, 3321.5577454065537, 3321.5577454065537],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529999/1000000 [9:51:30<6:09:25, 21.20it/s]global step 530000, trans_decision ep_re 1995.1945703108606

{"global_step": 530000, "eval_re": [1995.1945703108604, 1995.1945703108604, 
1995.1945703108604, 1995.1945703108604, 1995.1945703108604, 1995.1945703108604, 
1995.1945703108604, 1995.1945703108604, 1995.1945703108604, 1995.1945703108604],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [10:02:40<6:06:03, 20.94it/s]global step 540000, trans_decision ep_re 1569.2983517192224

{"global_step": 540000, "eval_re": [1569.2983517192224, 1569.2983517192224, 
1569.2983517192224, 1569.2983517192224, 1569.2983517192224, 1569.2983517192224, 
1569.2983517192224, 1569.2983517192224, 1569.2983517192224, 1569.2983517192224],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549998/1000000 [10:13:50<5:55:13, 21.11it/s]global step 550000, trans_decision ep_re 1926.4422649542844

{"global_step": 550000, "eval_re": [1926.4422649542848, 1926.4422649542848, 
1926.4422649542848, 1926.4422649542848, 1926.4422649542848, 1926.4422649542848, 
1926.4422649542848, 1926.4422649542848, 1926.4422649542848, 1926.4422649542848],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559998/1000000 [10:25:10<5:45:01, 21.25it/s]global step 560000, trans_decision ep_re 1367.2796111832872

{"global_step": 560000, "eval_re": [1367.2796111832872, 1367.2796111832872, 
1367.2796111832872, 1367.2796111832872, 1367.2796111832872, 1367.2796111832872, 
1367.2796111832872, 1367.2796111832872, 1367.2796111832872, 1367.2796111832872],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [10:36:30<5:40:28, 21.05it/s]global step 570000, trans_decision ep_re 1274.9488978275483

{"global_step": 570000, "eval_re": [1274.9488978275485, 1274.9488978275485, 
1274.9488978275485, 1274.9488978275485, 1274.9488978275485, 1274.9488978275485, 
1274.9488978275485, 1274.9488978275485, 1274.9488978275485, 1274.9488978275485],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [10:47:50<5:36:57, 20.77it/s]global step 580000, trans_decision ep_re 1629.4295702592226

{"global_step": 580000, "eval_re": [1629.4295702592226, 1629.4295702592226, 
1629.4295702592226, 1629.4295702592226, 1629.4295702592226, 1629.4295702592226, 
1629.4295702592226, 1629.4295702592226, 1629.4295702592226, 1629.4295702592226],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589998/1000000 [10:59:00<5:20:43, 21.31it/s]global step 590000, trans_decision ep_re 1390.7619735316916

{"global_step": 590000, "eval_re": [1390.7619735316919, 1390.7619735316919, 
1390.7619735316919, 1390.7619735316919, 1390.7619735316919, 1390.7619735316919, 
1390.7619735316919, 1390.7619735316919, 1390.7619735316919, 1390.7619735316919],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [11:10:10<5:20:36, 20.79it/s]global step 600000, trans_decision ep_re 2324.3309362749324

{"global_step": 600000, "eval_re": [2324.3309362749324, 2324.3309362749324, 
2324.3309362749324, 2324.3309362749324, 2324.3309362749324, 2324.3309362749324, 
2324.3309362749324, 2324.3309362749324, 2324.3309362749324, 2324.3309362749324],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [11:21:20<5:09:24, 21.01it/s]global step 610000, trans_decision ep_re 4066.1612629060887

{"global_step": 610000, "eval_re": [4066.1612629060883, 4066.1612629060883, 
4066.1612629060883, 4066.1612629060883, 4066.1612629060883, 4066.1612629060883, 
4066.1612629060883, 4066.1612629060883, 4066.1612629060883, 4066.1612629060883],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [11:32:40<5:04:19, 20.81it/s]global step 620000, trans_decision ep_re 4451.811865211524

{"global_step": 620000, "eval_re": [4451.811865211524, 4451.811865211524, 
4451.811865211524, 4451.811865211524, 4451.811865211524, 4451.811865211524, 
4451.811865211524, 4451.811865211524, 4451.811865211524, 4451.811865211524], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [11:44:00<4:53:40, 21.00it/s]global step 630000, trans_decision ep_re 1333.0005562257343

{"global_step": 630000, "eval_re": [1333.0005562257343, 1333.0005562257343, 
1333.0005562257343, 1333.0005562257343, 1333.0005562257343, 1333.0005562257343, 
1333.0005562257343, 1333.0005562257343, 1333.0005562257343, 1333.0005562257343],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639998/1000000 [11:55:10<4:40:48, 21.37it/s]global step 640000, trans_decision ep_re 2721.3207177096015

{"global_step": 640000, "eval_re": [2721.3207177096015, 2721.3207177096015, 
2721.3207177096015, 2721.3207177096015, 2721.3207177096015, 2721.3207177096015, 
2721.3207177096015, 2721.3207177096015, 2721.3207177096015, 2721.3207177096015],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649998/1000000 [12:06:20<4:33:18, 21.34it/s]global step 650000, trans_decision ep_re 2621.1448709277747

{"global_step": 650000, "eval_re": [2621.1448709277743, 2621.1448709277743, 
2621.1448709277743, 2621.1448709277743, 2621.1448709277743, 2621.1448709277743, 
2621.1448709277743, 2621.1448709277743, 2621.1448709277743, 2621.1448709277743],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659998/1000000 [12:17:40<4:27:40, 21.17it/s]global step 660000, trans_decision ep_re 2572.3146720110863

{"global_step": 660000, "eval_re": [2572.3146720110867, 2572.3146720110867, 
2572.3146720110867, 2572.3146720110867, 2572.3146720110867, 2572.3146720110867, 
2572.3146720110867, 2572.3146720110867, 2572.3146720110867, 2572.3146720110867],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [12:29:00<4:20:09, 21.14it/s]global step 670000, trans_decision ep_re 4348.021663499778

{"global_step": 670000, "eval_re": [4348.021663499778, 4348.021663499778, 
4348.021663499778, 4348.021663499778, 4348.021663499778, 4348.021663499778, 
4348.021663499778, 4348.021663499778, 4348.021663499778, 4348.021663499778], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679998/1000000 [12:40:10<4:10:08, 21.32it/s]global step 680000, trans_decision ep_re 3078.187562346419

{"global_step": 680000, "eval_re": [3078.187562346419, 3078.187562346419, 
3078.187562346419, 3078.187562346419, 3078.187562346419, 3078.187562346419, 
3078.187562346419, 3078.187562346419, 3078.187562346419, 3078.187562346419], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [12:51:20<4:07:51, 20.85it/s]global step 690000, trans_decision ep_re 1621.631024500737

{"global_step": 690000, "eval_re": [1621.631024500737, 1621.631024500737, 
1621.631024500737, 1621.631024500737, 1621.631024500737, 1621.631024500737, 
1621.631024500737, 1621.631024500737, 1621.631024500737, 1621.631024500737], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [13:02:40<3:57:39, 21.04it/s]global step 700000, trans_decision ep_re 3483.268931235424

{"global_step": 700000, "eval_re": [3483.268931235424, 3483.268931235424, 
3483.268931235424, 3483.268931235424, 3483.268931235424, 3483.268931235424, 
3483.268931235424, 3483.268931235424, 3483.268931235424, 3483.268931235424], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709998/1000000 [13:13:50<3:45:24, 21.44it/s]global step 710000, trans_decision ep_re 1266.0526081297537

{"global_step": 710000, "eval_re": [1266.0526081297537, 1266.0526081297537, 
1266.0526081297537, 1266.0526081297537, 1266.0526081297537, 1266.0526081297537, 
1266.0526081297537, 1266.0526081297537, 1266.0526081297537, 1266.0526081297537],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719998/1000000 [13:25:00<3:38:58, 21.31it/s]global step 720000, trans_decision ep_re 1576.0739514292552

{"global_step": 720000, "eval_re": [1576.0739514292554, 1576.0739514292554, 
1576.0739514292554, 1576.0739514292554, 1576.0739514292554, 1576.0739514292554, 
1576.0739514292554, 1576.0739514292554, 1576.0739514292554, 1576.0739514292554],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [13:36:10<3:33:25, 21.08it/s]global step 730000, trans_decision ep_re 1868.6355821668935

{"global_step": 730000, "eval_re": [1868.6355821668933, 1868.6355821668933, 
1868.6355821668933, 1868.6355821668933, 1868.6355821668933, 1868.6355821668933, 
1868.6355821668933, 1868.6355821668933, 1868.6355821668933, 1868.6355821668933],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [13:47:30<3:26:06, 21.03it/s]global step 740000, trans_decision ep_re 1620.3688554835371

{"global_step": 740000, "eval_re": [1620.3688554835371, 1620.3688554835371, 
1620.3688554835371, 1620.3688554835371, 1620.3688554835371, 1620.3688554835371, 
1620.3688554835371, 1620.3688554835371, 1620.3688554835371, 1620.3688554835371],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [13:58:50<3:17:49, 21.06it/s]global step 750000, trans_decision ep_re 1525.3496733555735

{"global_step": 750000, "eval_re": [1525.3496733555737, 1525.3496733555737, 
1525.3496733555737, 1525.3496733555737, 1525.3496733555737, 1525.3496733555737, 
1525.3496733555737, 1525.3496733555737, 1525.3496733555737, 1525.3496733555737],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [14:10:00<3:11:28, 20.89it/s]global step 760000, trans_decision ep_re 1522.1093691624635

{"global_step": 760000, "eval_re": [1522.1093691624637, 1522.1093691624637, 
1522.1093691624637, 1522.1093691624637, 1522.1093691624637, 1522.1093691624637, 
1522.1093691624637, 1522.1093691624637, 1522.1093691624637, 1522.1093691624637],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [14:21:20<3:01:47, 21.09it/s]global step 770000, trans_decision ep_re 1327.0450615680281

{"global_step": 770000, "eval_re": [1327.0450615680284, 1327.0450615680284, 
1327.0450615680284, 1327.0450615680284, 1327.0450615680284, 1327.0450615680284, 
1327.0450615680284, 1327.0450615680284, 1327.0450615680284, 1327.0450615680284],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [14:32:30<2:51:53, 21.33it/s]global step 780000, trans_decision ep_re 1829.167069922983

{"global_step": 780000, "eval_re": [1829.1670699229833, 1829.1670699229833, 
1829.1670699229833, 1829.1670699229833, 1829.1670699229833, 1829.1670699229833, 
1829.1670699229833, 1829.1670699229833, 1829.1670699229833, 1829.1670699229833],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [14:43:50<2:45:00, 21.21it/s]global step 790000, trans_decision ep_re 2044.4464530370271

{"global_step": 790000, "eval_re": [2044.4464530370271, 2044.4464530370271, 
2044.4464530370271, 2044.4464530370271, 2044.4464530370271, 2044.4464530370271, 
2044.4464530370271, 2044.4464530370271, 2044.4464530370271, 2044.4464530370271],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [14:55:01<2:37:15, 21.20it/s]global step 800000, trans_decision ep_re 1609.1281438255487

{"global_step": 800000, "eval_re": [1609.1281438255487, 1609.1281438255487, 
1609.1281438255487, 1609.1281438255487, 1609.1281438255487, 1609.1281438255487, 
1609.1281438255487, 1609.1281438255487, 1609.1281438255487, 1609.1281438255487],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809998/1000000 [15:06:11<2:29:51, 21.13it/s]global step 810000, trans_decision ep_re 2993.185857960724

{"global_step": 810000, "eval_re": [2993.1858579607238, 2993.1858579607238, 
2993.1858579607238, 2993.1858579607238, 2993.1858579607238, 2993.1858579607238, 
2993.1858579607238, 2993.1858579607238, 2993.1858579607238, 2993.1858579607238],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [15:17:21<2:22:46, 21.01it/s]global step 820000, trans_decision ep_re 1930.4632005388262

{"global_step": 820000, "eval_re": [1930.4632005388262, 1930.4632005388262, 
1930.4632005388262, 1930.4632005388262, 1930.4632005388262, 1930.4632005388262, 
1930.4632005388262, 1930.4632005388262, 1930.4632005388262, 1930.4632005388262],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [15:28:41<2:15:27, 20.92it/s]global step 830000, trans_decision ep_re 2799.2002726507717

{"global_step": 830000, "eval_re": [2799.200272650772, 2799.200272650772, 
2799.200272650772, 2799.200272650772, 2799.200272650772, 2799.200272650772, 
2799.200272650772, 2799.200272650772, 2799.200272650772, 2799.200272650772], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [15:40:01<2:06:22, 21.10it/s]global step 840000, trans_decision ep_re 2139.5879048546312

{"global_step": 840000, "eval_re": [2139.5879048546312, 2139.5879048546312, 
2139.5879048546312, 2139.5879048546312, 2139.5879048546312, 2139.5879048546312, 
2139.5879048546312, 2139.5879048546312, 2139.5879048546312, 2139.5879048546312],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849998/1000000 [15:51:11<1:57:21, 21.30it/s]global step 850000, trans_decision ep_re 1984.320391379319

{"global_step": 850000, "eval_re": [1984.3203913793188, 1984.3203913793188, 
1984.3203913793188, 1984.3203913793188, 1984.3203913793188, 1984.3203913793188, 
1984.3203913793188, 1984.3203913793188, 1984.3203913793188, 1984.3203913793188],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [16:02:31<1:52:54, 20.67it/s]global step 860000, trans_decision ep_re 2148.3165929110874

{"global_step": 860000, "eval_re": [2148.3165929110874, 2148.3165929110874, 
2148.3165929110874, 2148.3165929110874, 2148.3165929110874, 2148.3165929110874, 
2148.3165929110874, 2148.3165929110874, 2148.3165929110874, 2148.3165929110874],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869998/1000000 [16:13:41<1:43:08, 21.01it/s]global step 870000, trans_decision ep_re 1683.8810493277572

{"global_step": 870000, "eval_re": [1683.8810493277572, 1683.8810493277572, 
1683.8810493277572, 1683.8810493277572, 1683.8810493277572, 1683.8810493277572, 
1683.8810493277572, 1683.8810493277572, 1683.8810493277572, 1683.8810493277572],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [16:24:51<1:37:02, 20.61it/s]global step 880000, trans_decision ep_re 1367.7316130534218

{"global_step": 880000, "eval_re": [1367.7316130534218, 1367.7316130534218, 
1367.7316130534218, 1367.7316130534218, 1367.7316130534218, 1367.7316130534218, 
1367.7316130534218, 1367.7316130534218, 1367.7316130534218, 1367.7316130534218],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889998/1000000 [16:36:11<1:26:21, 21.23it/s]global step 890000, trans_decision ep_re 1344.9539400101216

{"global_step": 890000, "eval_re": [1344.9539400101214, 1344.9539400101214, 
1344.9539400101214, 1344.9539400101214, 1344.9539400101214, 1344.9539400101214, 
1344.9539400101214, 1344.9539400101214, 1344.9539400101214, 1344.9539400101214],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [16:47:21<1:19:26, 20.98it/s]global step 900000, trans_decision ep_re 1943.332950857465

{"global_step": 900000, "eval_re": [1943.3329508574652, 1943.3329508574652, 
1943.3329508574652, 1943.3329508574652, 1943.3329508574652, 1943.3329508574652, 
1943.3329508574652, 1943.3329508574652, 1943.3329508574652, 1943.3329508574652],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909998/1000000 [16:58:41<1:10:48, 21.19it/s]global step 910000, trans_decision ep_re 1542.366770406274

{"global_step": 910000, "eval_re": [1542.366770406274, 1542.366770406274, 
1542.366770406274, 1542.366770406274, 1542.366770406274, 1542.366770406274, 
1542.366770406274, 1542.366770406274, 1542.366770406274, 1542.366770406274], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [17:09:51<1:03:41, 20.93it/s]global step 920000, trans_decision ep_re 1563.5154371906506

{"global_step": 920000, "eval_re": [1563.5154371906508, 1563.5154371906508, 
1563.5154371906508, 1563.5154371906508, 1563.5154371906508, 1563.5154371906508, 
1563.5154371906508, 1563.5154371906508, 1563.5154371906508, 1563.5154371906508],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [17:21:11<55:53, 20.88it/s]global step 930000, trans_decision ep_re 2074.208372673566

{"global_step": 930000, "eval_re": [2074.208372673566, 2074.208372673566, 
2074.208372673566, 2074.208372673566, 2074.208372673566, 2074.208372673566, 
2074.208372673566, 2074.208372673566, 2074.208372673566, 2074.208372673566], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [17:32:21<47:30, 21.05it/s]global step 940000, trans_decision ep_re 1366.255506702702

{"global_step": 940000, "eval_re": [1366.255506702702, 1366.255506702702, 
1366.255506702702, 1366.255506702702, 1366.255506702702, 1366.255506702702, 
1366.255506702702, 1366.255506702702, 1366.255506702702, 1366.255506702702], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949998/1000000 [17:43:31<39:36, 21.04it/s]global step 950000, trans_decision ep_re 1471.329283546443

{"global_step": 950000, "eval_re": [1471.329283546443, 1471.329283546443, 
1471.329283546443, 1471.329283546443, 1471.329283546443, 1471.329283546443, 
1471.329283546443, 1471.329283546443, 1471.329283546443, 1471.329283546443], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [17:54:51<31:44, 21.01it/s]global step 960000, trans_decision ep_re 1412.0831876925517

{"global_step": 960000, "eval_re": [1412.083187692552, 1412.083187692552, 
1412.083187692552, 1412.083187692552, 1412.083187692552, 1412.083187692552, 
1412.083187692552, 1412.083187692552, 1412.083187692552, 1412.083187692552], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [18:06:11<23:48, 21.00it/s]global step 970000, trans_decision ep_re 1895.6916409590947

{"global_step": 970000, "eval_re": [1895.6916409590947, 1895.6916409590947, 
1895.6916409590947, 1895.6916409590947, 1895.6916409590947, 1895.6916409590947, 
1895.6916409590947, 1895.6916409590947, 1895.6916409590947, 1895.6916409590947],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [18:17:21<16:02, 20.79it/s]global step 980000, trans_decision ep_re 2827.606725305188

{"global_step": 980000, "eval_re": [2827.606725305188, 2827.606725305188, 
2827.606725305188, 2827.606725305188, 2827.606725305188, 2827.606725305188, 
2827.606725305188, 2827.606725305188, 2827.606725305188, 2827.606725305188], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989997/1000000 [18:28:41<07:57, 20.96it/s]global step 990000, trans_decision ep_re 92.07680001479812

{"global_step": 990000, "eval_re": [92.07680001479814, 92.07680001479814, 
92.07680001479814, 92.07680001479814, 92.07680001479814, 92.07680001479814, 
92.07680001479814, 92.07680001479814, 92.07680001479814, 92.07680001479814], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999998/1000000 [18:40:01<00:00, 20.51it/s]global step 1000000, trans_decision ep_re 1266.7655247571786

{"global_step": 1000000, "eval_re": [1266.7655247571786, 1266.7655247571786, 
1266.7655247571786, 1266.7655247571786, 1266.7655247571786, 1266.7655247571786, 
1266.7655247571786, 1266.7655247571786, 1266.7655247571786, 1266.7655247571786],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [18:40:38<00:00, 14.87it/s]
