
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [05:02<12:11:42, 22.55it/s]global step 10000, trans_decision ep_re 251.31158665121498

{"global_step": 10000, "eval_re": [251.311586651215, 251.311586651215, 
251.311586651215, 251.311586651215, 251.311586651215, 251.311586651215, 
251.311586651215, 251.311586651215, 251.311586651215, 251.311586651215], 
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

  2%|▏         | 19998/1000000 [15:20<11:57:46, 22.76it/s]global step 20000, trans_decision ep_re 272.029062241258

{"global_step": 20000, "eval_re": [272.0290622412579, 272.0290622412579, 
272.0290622412579, 272.0290622412579, 272.0290622412579, 272.0290622412579, 
272.0290622412579, 272.0290622412579, 272.0290622412579, 272.0290622412579], 
"eval_len": [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]}

  3%|▎         | 29999/1000000 [25:20<11:55:53, 22.58it/s]global step 30000, trans_decision ep_re 343.2133065153986

{"global_step": 30000, "eval_re": [343.21330651539864, 343.21330651539864, 
343.21330651539864, 343.21330651539864, 343.21330651539864, 343.21330651539864, 
343.21330651539864, 343.21330651539864, 343.21330651539864, 343.21330651539864],
"eval_len": [64, 64, 64, 64, 64, 64, 64, 64, 64, 64]}

  4%|▍         | 39999/1000000 [35:20<11:52:11, 22.47it/s]global step 40000, trans_decision ep_re 350.62549275898743

{"global_step": 40000, "eval_re": [350.6254927589874, 350.6254927589874, 
350.6254927589874, 350.6254927589874, 350.6254927589874, 350.6254927589874, 
350.6254927589874, 350.6254927589874, 350.6254927589874, 350.6254927589874], 
"eval_len": [66, 66, 66, 66, 66, 66, 66, 66, 66, 66]}

  5%|▍         | 49998/1000000 [45:30<11:41:31, 22.57it/s]global step 50000, trans_decision ep_re 269.1918312827003

{"global_step": 50000, "eval_re": [269.1918312827004, 269.1918312827004, 
269.1918312827004, 269.1918312827004, 269.1918312827004, 269.1918312827004, 
269.1918312827004, 269.1918312827004, 269.1918312827004, 269.1918312827004], 
"eval_len": [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]}

  6%|▌         | 59999/1000000 [55:20<11:33:28, 22.59it/s]global step 60000, trans_decision ep_re 335.65369854441343

{"global_step": 60000, "eval_re": [335.6536985444134, 335.6536985444134, 
335.6536985444134, 335.6536985444134, 335.6536985444134, 335.6536985444134, 
335.6536985444134, 335.6536985444134, 335.6536985444134, 335.6536985444134], 
"eval_len": [63, 63, 63, 63, 63, 63, 63, 63, 63, 63]}

  7%|▋         | 69999/1000000 [1:05:40<11:32:51, 22.37it/s]global step 70000, trans_decision ep_re 400.11731240147384

{"global_step": 70000, "eval_re": [400.1173124014738, 400.1173124014738, 
400.1173124014738, 400.1173124014738, 400.1173124014738, 400.1173124014738, 
400.1173124014738, 400.1173124014738, 400.1173124014738, 400.1173124014738], 
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

  8%|▊         | 79999/1000000 [1:15:40<11:22:54, 22.45it/s]global step 80000, trans_decision ep_re 398.5094720350772

{"global_step": 80000, "eval_re": [398.5094720350772, 398.5094720350772, 
398.5094720350772, 398.5094720350772, 398.5094720350772, 398.5094720350772, 
398.5094720350772, 398.5094720350772, 398.5094720350772, 398.5094720350772], 
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

  9%|▉         | 89998/1000000 [1:25:50<11:04:21, 22.83it/s]global step 90000, trans_decision ep_re 365.9482320155606

{"global_step": 90000, "eval_re": [365.9482320155606, 365.9482320155606, 
365.9482320155606, 365.9482320155606, 365.9482320155606, 365.9482320155606, 
365.9482320155606, 365.9482320155606, 365.9482320155606, 365.9482320155606], 
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

 10%|▉         | 99999/1000000 [1:35:41<11:09:11, 22.42it/s]global step 100000, trans_decision ep_re 238.8711496332381

{"global_step": 100000, "eval_re": [238.8711496332381, 238.8711496332381, 
238.8711496332381, 238.8711496332381, 238.8711496332381, 238.8711496332381, 
238.8711496332381, 238.8711496332381, 238.8711496332381, 238.8711496332381], 
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 11%|█         | 109999/1000000 [1:46:00<10:54:57, 22.65it/s]global step 110000, trans_decision ep_re 285.7603189526459

{"global_step": 110000, "eval_re": [285.7603189526459, 285.7603189526459, 
285.7603189526459, 285.7603189526459, 285.7603189526459, 285.7603189526459, 
285.7603189526459, 285.7603189526459, 285.7603189526459, 285.7603189526459], 
"eval_len": [56, 56, 56, 56, 56, 56, 56, 56, 56, 56]}

 12%|█▏        | 119999/1000000 [1:56:00<10:53:37, 22.44it/s]global step 120000, trans_decision ep_re 314.9673843467899

{"global_step": 120000, "eval_re": [314.9673843467899, 314.9673843467899, 
314.9673843467899, 314.9673843467899, 314.9673843467899, 314.9673843467899, 
314.9673843467899, 314.9673843467899, 314.9673843467899, 314.9673843467899], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 13%|█▎        | 129998/1000000 [2:05:51<10:33:01, 22.91it/s]global step 130000, trans_decision ep_re 350.689480643813

{"global_step": 130000, "eval_re": [350.68948064381294, 350.68948064381294, 
350.68948064381294, 350.68948064381294, 350.68948064381294, 350.68948064381294, 
350.68948064381294, 350.68948064381294, 350.68948064381294, 350.68948064381294],
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

 14%|█▍        | 139999/1000000 [2:16:10<10:44:35, 22.24it/s]global step 140000, trans_decision ep_re 316.43851781612113

{"global_step": 140000, "eval_re": [316.43851781612113, 316.43851781612113, 
316.43851781612113, 316.43851781612113, 316.43851781612113, 316.43851781612113, 
316.43851781612113, 316.43851781612113, 316.43851781612113, 316.43851781612113],
"eval_len": [60, 60, 60, 60, 60, 60, 60, 60, 60, 60]}

 15%|█▍        | 149999/1000000 [2:26:00<10:27:06, 22.59it/s]global step 150000, trans_decision ep_re 228.20316332320945

{"global_step": 150000, "eval_re": [228.20316332320945, 228.20316332320945, 
228.20316332320945, 228.20316332320945, 228.20316332320945, 228.20316332320945, 
228.20316332320945, 228.20316332320945, 228.20316332320945, 228.20316332320945],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

 16%|█▌        | 159999/1000000 [2:36:20<10:23:56, 22.44it/s]global step 160000, trans_decision ep_re 311.4915176850491

{"global_step": 160000, "eval_re": [311.4915176850491, 311.4915176850491, 
311.4915176850491, 311.4915176850491, 311.4915176850491, 311.4915176850491, 
311.4915176850491, 311.4915176850491, 311.4915176850491, 311.4915176850491], 
"eval_len": [59, 59, 59, 59, 59, 59, 59, 59, 59, 59]}

 17%|█▋        | 169999/1000000 [2:46:20<10:14:59, 22.49it/s]global step 170000, trans_decision ep_re 313.247874112477

{"global_step": 170000, "eval_re": [313.247874112477, 313.247874112477, 
313.247874112477, 313.247874112477, 313.247874112477, 313.247874112477, 
313.247874112477, 313.247874112477, 313.247874112477, 313.247874112477], 
"eval_len": [59, 59, 59, 59, 59, 59, 59, 59, 59, 59]}

 18%|█▊        | 179998/1000000 [2:56:30<9:59:39, 22.79it/s]global step 180000, trans_decision ep_re 379.1525677968631

{"global_step": 180000, "eval_re": [379.15256779686314, 379.15256779686314, 
379.15256779686314, 379.15256779686314, 379.15256779686314, 379.15256779686314, 
379.15256779686314, 379.15256779686314, 379.15256779686314, 379.15256779686314],
"eval_len": [70, 70, 70, 70, 70, 70, 70, 70, 70, 70]}

 19%|█▉        | 189999/1000000 [3:06:30<10:03:07, 22.38it/s]global step 190000, trans_decision ep_re 361.6206654010234

{"global_step": 190000, "eval_re": [361.6206654010234, 361.6206654010234, 
361.6206654010234, 361.6206654010234, 361.6206654010234, 361.6206654010234, 
361.6206654010234, 361.6206654010234, 361.6206654010234, 361.6206654010234], 
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 20%|█▉        | 199998/1000000 [3:16:21<9:44:24, 22.82it/s]global step 200000, trans_decision ep_re 360.7916327281973

{"global_step": 200000, "eval_re": [360.7916327281973, 360.7916327281973, 
360.7916327281973, 360.7916327281973, 360.7916327281973, 360.7916327281973, 
360.7916327281973, 360.7916327281973, 360.7916327281973, 360.7916327281973], 
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 21%|██        | 209999/1000000 [3:26:40<9:47:27, 22.41it/s]global step 210000, trans_decision ep_re 256.0994794788825

{"global_step": 210000, "eval_re": [256.0994794788824, 256.0994794788824, 
256.0994794788824, 256.0994794788824, 256.0994794788824, 256.0994794788824, 
256.0994794788824, 256.0994794788824, 256.0994794788824, 256.0994794788824], 
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 22%|██▏       | 219999/1000000 [3:36:30<9:41:21, 22.36it/s]global step 220000, trans_decision ep_re 234.14963723926354

{"global_step": 220000, "eval_re": [234.1496372392635, 234.1496372392635, 
234.1496372392635, 234.1496372392635, 234.1496372392635, 234.1496372392635, 
234.1496372392635, 234.1496372392635, 234.1496372392635, 234.1496372392635], 
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 23%|██▎       | 229998/1000000 [3:46:50<9:29:29, 22.53it/s]global step 230000, trans_decision ep_re 319.1576380623518

{"global_step": 230000, "eval_re": [319.1576380623518, 319.1576380623518, 
319.1576380623518, 319.1576380623518, 319.1576380623518, 319.1576380623518, 
319.1576380623518, 319.1576380623518, 319.1576380623518, 319.1576380623518], 
"eval_len": [60, 60, 60, 60, 60, 60, 60, 60, 60, 60]}

 24%|██▍       | 239999/1000000 [3:56:50<9:26:13, 22.37it/s]global step 240000, trans_decision ep_re 351.17797915632207

{"global_step": 240000, "eval_re": [351.177979156322, 351.177979156322, 
351.177979156322, 351.177979156322, 351.177979156322, 351.177979156322, 
351.177979156322, 351.177979156322, 351.177979156322, 351.177979156322], 
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

 25%|██▍       | 249998/1000000 [4:06:40<9:02:57, 23.02it/s]global step 250000, trans_decision ep_re 247.01724223648338

{"global_step": 250000, "eval_re": [247.01724223648338, 247.01724223648338, 
247.01724223648338, 247.01724223648338, 247.01724223648338, 247.01724223648338, 
247.01724223648338, 247.01724223648338, 247.01724223648338, 247.01724223648338],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 26%|██▌       | 259999/1000000 [4:16:50<9:09:47, 22.43it/s]global step 260000, trans_decision ep_re 234.4956757632743

{"global_step": 260000, "eval_re": [234.4956757632743, 234.4956757632743, 
234.4956757632743, 234.4956757632743, 234.4956757632743, 234.4956757632743, 
234.4956757632743, 234.4956757632743, 234.4956757632743, 234.4956757632743], 
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 27%|██▋       | 269999/1000000 [4:26:50<8:57:22, 22.64it/s]global step 270000, trans_decision ep_re 247.45762265053017

{"global_step": 270000, "eval_re": [247.4576226505302, 247.4576226505302, 
247.4576226505302, 247.4576226505302, 247.4576226505302, 247.4576226505302, 
247.4576226505302, 247.4576226505302, 247.4576226505302, 247.4576226505302], 
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 28%|██▊       | 279999/1000000 [4:36:50<8:51:17, 22.59it/s]global step 280000, trans_decision ep_re 273.41116236403826

{"global_step": 280000, "eval_re": [273.41116236403826, 273.41116236403826, 
273.41116236403826, 273.41116236403826, 273.41116236403826, 273.41116236403826, 
273.41116236403826, 273.41116236403826, 273.41116236403826, 273.41116236403826],
"eval_len": [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]}

 29%|██▉       | 289999/1000000 [4:46:50<8:43:41, 22.60it/s]global step 290000, trans_decision ep_re 267.5284894253699

{"global_step": 290000, "eval_re": [267.5284894253699, 267.5284894253699, 
267.5284894253699, 267.5284894253699, 267.5284894253699, 267.5284894253699, 
267.5284894253699, 267.5284894253699, 267.5284894253699, 267.5284894253699], 
"eval_len": [52, 52, 52, 52, 52, 52, 52, 52, 52, 52]}

 30%|██▉       | 299999/1000000 [4:56:50<8:35:28, 22.63it/s]global step 300000, trans_decision ep_re 354.36042388011776

{"global_step": 300000, "eval_re": [354.36042388011776, 354.36042388011776, 
354.36042388011776, 354.36042388011776, 354.36042388011776, 354.36042388011776, 
354.36042388011776, 354.36042388011776, 354.36042388011776, 354.36042388011776],
"eval_len": [66, 66, 66, 66, 66, 66, 66, 66, 66, 66]}

 31%|███       | 309999/1000000 [5:06:50<8:26:18, 22.71it/s]global step 310000, trans_decision ep_re 300.1882334300844

{"global_step": 310000, "eval_re": [300.1882334300844, 300.1882334300844, 
300.1882334300844, 300.1882334300844, 300.1882334300844, 300.1882334300844, 
300.1882334300844, 300.1882334300844, 300.1882334300844, 300.1882334300844], 
"eval_len": [57, 57, 57, 57, 57, 57, 57, 57, 57, 57]}

 32%|███▏      | 319999/1000000 [5:16:50<8:22:43, 22.54it/s]global step 320000, trans_decision ep_re 327.1132075012489

{"global_step": 320000, "eval_re": [327.1132075012489, 327.1132075012489, 
327.1132075012489, 327.1132075012489, 327.1132075012489, 327.1132075012489, 
327.1132075012489, 327.1132075012489, 327.1132075012489, 327.1132075012489], 
"eval_len": [62, 62, 62, 62, 62, 62, 62, 62, 62, 62]}

 33%|███▎      | 329999/1000000 [5:26:50<8:14:26, 22.58it/s]global step 330000, trans_decision ep_re 304.14339392562397

{"global_step": 330000, "eval_re": [304.14339392562397, 304.14339392562397, 
304.14339392562397, 304.14339392562397, 304.14339392562397, 304.14339392562397, 
304.14339392562397, 304.14339392562397, 304.14339392562397, 304.14339392562397],
"eval_len": [58, 58, 58, 58, 58, 58, 58, 58, 58, 58]}

 34%|███▍      | 339999/1000000 [5:36:50<8:08:14, 22.53it/s]global step 340000, trans_decision ep_re 336.2566026335099

{"global_step": 340000, "eval_re": [336.2566026335099, 336.2566026335099, 
336.2566026335099, 336.2566026335099, 336.2566026335099, 336.2566026335099, 
336.2566026335099, 336.2566026335099, 336.2566026335099, 336.2566026335099], 
"eval_len": [64, 64, 64, 64, 64, 64, 64, 64, 64, 64]}

 35%|███▍      | 349999/1000000 [5:46:50<7:57:51, 22.67it/s]global step 350000, trans_decision ep_re 293.53696573151404

{"global_step": 350000, "eval_re": [293.53696573151404, 293.53696573151404, 
293.53696573151404, 293.53696573151404, 293.53696573151404, 293.53696573151404, 
293.53696573151404, 293.53696573151404, 293.53696573151404, 293.53696573151404],
"eval_len": [56, 56, 56, 56, 56, 56, 56, 56, 56, 56]}

 36%|███▌      | 359999/1000000 [5:56:50<7:53:44, 22.52it/s]global step 360000, trans_decision ep_re 324.0815324404333

{"global_step": 360000, "eval_re": [324.0815324404333, 324.0815324404333, 
324.0815324404333, 324.0815324404333, 324.0815324404333, 324.0815324404333, 
324.0815324404333, 324.0815324404333, 324.0815324404333, 324.0815324404333], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 37%|███▋      | 369999/1000000 [6:06:50<7:46:05, 22.53it/s]global step 370000, trans_decision ep_re 260.69900368855804

{"global_step": 370000, "eval_re": [260.69900368855804, 260.69900368855804, 
260.69900368855804, 260.69900368855804, 260.69900368855804, 260.69900368855804, 
260.69900368855804, 260.69900368855804, 260.69900368855804, 260.69900368855804],
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 38%|███▊      | 379999/1000000 [6:16:50<7:35:27, 22.69it/s]global step 380000, trans_decision ep_re 240.49898634806215

{"global_step": 380000, "eval_re": [240.49898634806212, 240.49898634806212, 
240.49898634806212, 240.49898634806212, 240.49898634806212, 240.49898634806212, 
240.49898634806212, 240.49898634806212, 240.49898634806212, 240.49898634806212],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 39%|███▉      | 389999/1000000 [6:26:50<7:30:45, 22.55it/s]global step 390000, trans_decision ep_re 279.7889896026995

{"global_step": 390000, "eval_re": [279.7889896026995, 279.7889896026995, 
279.7889896026995, 279.7889896026995, 279.7889896026995, 279.7889896026995, 
279.7889896026995, 279.7889896026995, 279.7889896026995, 279.7889896026995], 
"eval_len": [54, 54, 54, 54, 54, 54, 54, 54, 54, 54]}

 40%|███▉      | 399999/1000000 [6:36:50<7:22:20, 22.61it/s]global step 400000, trans_decision ep_re 405.4829898470265

{"global_step": 400000, "eval_re": [405.48298984702643, 405.48298984702643, 
405.48298984702643, 405.48298984702643, 405.48298984702643, 405.48298984702643, 
405.48298984702643, 405.48298984702643, 405.48298984702643, 405.48298984702643],
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 41%|████      | 409999/1000000 [6:46:50<7:18:24, 22.43it/s]global step 410000, trans_decision ep_re 254.3056496056487

{"global_step": 410000, "eval_re": [254.30564960564868, 254.30564960564868, 
254.30564960564868, 254.30564960564868, 254.30564960564868, 254.30564960564868, 
254.30564960564868, 254.30564960564868, 254.30564960564868, 254.30564960564868],
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 42%|████▏     | 419999/1000000 [6:56:50<7:06:31, 22.66it/s]global step 420000, trans_decision ep_re 242.11606059588422

{"global_step": 420000, "eval_re": [242.1160605958842, 242.1160605958842, 
242.1160605958842, 242.1160605958842, 242.1160605958842, 242.1160605958842, 
242.1160605958842, 242.1160605958842, 242.1160605958842, 242.1160605958842], 
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 43%|████▎     | 429999/1000000 [7:06:50<7:02:21, 22.49it/s]global step 430000, trans_decision ep_re 254.42652401529887

{"global_step": 430000, "eval_re": [254.42652401529884, 254.42652401529884, 
254.42652401529884, 254.42652401529884, 254.42652401529884, 254.42652401529884, 
254.42652401529884, 254.42652401529884, 254.42652401529884, 254.42652401529884],
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 44%|████▍     | 439999/1000000 [7:16:50<6:52:41, 22.62it/s]global step 440000, trans_decision ep_re 359.45822817393133

{"global_step": 440000, "eval_re": [359.45822817393133, 359.45822817393133, 
359.45822817393133, 359.45822817393133, 359.45822817393133, 359.45822817393133, 
359.45822817393133, 359.45822817393133, 359.45822817393133, 359.45822817393133],
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 45%|████▍     | 449998/1000000 [7:26:50<6:38:46, 22.99it/s]global step 450000, trans_decision ep_re 411.52300242894

{"global_step": 450000, "eval_re": [411.52300242893995, 411.52300242893995, 
411.52300242893995, 411.52300242893995, 411.52300242893995, 411.52300242893995, 
411.52300242893995, 411.52300242893995, 411.52300242893995, 411.52300242893995],
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 46%|████▌     | 459998/1000000 [7:36:40<6:33:43, 22.86it/s]global step 460000, trans_decision ep_re 347.50774597259493

{"global_step": 460000, "eval_re": [347.50774597259493, 347.50774597259493, 
347.50774597259493, 347.50774597259493, 347.50774597259493, 347.50774597259493, 
347.50774597259493, 347.50774597259493, 347.50774597259493, 347.50774597259493],
"eval_len": [66, 66, 66, 66, 66, 66, 66, 66, 66, 66]}

 47%|████▋     | 469999/1000000 [7:46:40<6:30:52, 22.60it/s]global step 470000, trans_decision ep_re 345.6083969586713

{"global_step": 470000, "eval_re": [345.6083969586713, 345.6083969586713, 
345.6083969586713, 345.6083969586713, 345.6083969586713, 345.6083969586713, 
345.6083969586713, 345.6083969586713, 345.6083969586713, 345.6083969586713], 
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

 48%|████▊     | 479998/1000000 [7:56:41<6:17:36, 22.95it/s]global step 480000, trans_decision ep_re 455.4091189824001

{"global_step": 480000, "eval_re": [455.40911898240006, 455.40911898240006, 
455.40911898240006, 455.40911898240006, 455.40911898240006, 455.40911898240006, 
455.40911898240006, 455.40911898240006, 455.40911898240006, 455.40911898240006],
"eval_len": [86, 86, 86, 86, 86, 86, 86, 86, 86, 86]}

 49%|████▉     | 489999/1000000 [8:06:41<6:16:56, 22.55it/s]global step 490000, trans_decision ep_re 316.47150291884964

{"global_step": 490000, "eval_re": [316.47150291884964, 316.47150291884964, 
316.47150291884964, 316.47150291884964, 316.47150291884964, 316.47150291884964, 
316.47150291884964, 316.47150291884964, 316.47150291884964, 316.47150291884964],
"eval_len": [60, 60, 60, 60, 60, 60, 60, 60, 60, 60]}

 50%|████▉     | 499997/1000000 [8:16:41<6:05:25, 22.80it/s]global step 500000, trans_decision ep_re 292.0038296433887

{"global_step": 500000, "eval_re": [292.00382964338877, 292.00382964338877, 
292.00382964338877, 292.00382964338877, 292.00382964338877, 292.00382964338877, 
292.00382964338877, 292.00382964338877, 292.00382964338877, 292.00382964338877],
"eval_len": [56, 56, 56, 56, 56, 56, 56, 56, 56, 56]}

 51%|█████     | 509998/1000000 [8:27:00<5:55:49, 22.95it/s]global step 510000, trans_decision ep_re 421.8616162927574

{"global_step": 510000, "eval_re": [421.8616162927574, 421.8616162927574, 
421.8616162927574, 421.8616162927574, 421.8616162927574, 421.8616162927574, 
421.8616162927574, 421.8616162927574, 421.8616162927574, 421.8616162927574], 
"eval_len": [79, 79, 79, 79, 79, 79, 79, 79, 79, 79]}

 52%|█████▏    | 519998/1000000 [8:37:00<5:53:38, 22.62it/s]global step 520000, trans_decision ep_re 383.20482292953227

{"global_step": 520000, "eval_re": [383.20482292953227, 383.20482292953227, 
383.20482292953227, 383.20482292953227, 383.20482292953227, 383.20482292953227, 
383.20482292953227, 383.20482292953227, 383.20482292953227, 383.20482292953227],
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 53%|█████▎    | 529997/1000000 [8:47:00<5:48:08, 22.50it/s]global step 530000, trans_decision ep_re 345.9180901526521

{"global_step": 530000, "eval_re": [345.9180901526521, 345.9180901526521, 
345.9180901526521, 345.9180901526521, 345.9180901526521, 345.9180901526521, 
345.9180901526521, 345.9180901526521, 345.9180901526521, 345.9180901526521], 
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

 54%|█████▍    | 539999/1000000 [8:57:00<5:37:15, 22.73it/s]global step 540000, trans_decision ep_re 377.14116113269927

{"global_step": 540000, "eval_re": [377.14116113269927, 377.14116113269927, 
377.14116113269927, 377.14116113269927, 377.14116113269927, 377.14116113269927, 
377.14116113269927, 377.14116113269927, 377.14116113269927, 377.14116113269927],
"eval_len": [71, 71, 71, 71, 71, 71, 71, 71, 71, 71]}

 55%|█████▍    | 549999/1000000 [9:07:00<5:30:34, 22.69it/s]global step 550000, trans_decision ep_re 330.2791553219863

{"global_step": 550000, "eval_re": [330.2791553219863, 330.2791553219863, 
330.2791553219863, 330.2791553219863, 330.2791553219863, 330.2791553219863, 
330.2791553219863, 330.2791553219863, 330.2791553219863, 330.2791553219863], 
"eval_len": [64, 64, 64, 64, 64, 64, 64, 64, 64, 64]}

 56%|█████▌    | 559999/1000000 [9:17:00<5:25:53, 22.50it/s]global step 560000, trans_decision ep_re 317.89799042490614

{"global_step": 560000, "eval_re": [317.8979904249061, 317.8979904249061, 
317.8979904249061, 317.8979904249061, 317.8979904249061, 317.8979904249061, 
317.8979904249061, 317.8979904249061, 317.8979904249061, 317.8979904249061], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 57%|█████▋    | 569997/1000000 [9:27:00<5:16:29, 22.64it/s]global step 570000, trans_decision ep_re 308.0223912117636

{"global_step": 570000, "eval_re": [308.0223912117636, 308.0223912117636, 
308.0223912117636, 308.0223912117636, 308.0223912117636, 308.0223912117636, 
308.0223912117636, 308.0223912117636, 308.0223912117636, 308.0223912117636], 
"eval_len": [60, 60, 60, 60, 60, 60, 60, 60, 60, 60]}

 58%|█████▊    | 579999/1000000 [9:37:00<5:10:31, 22.54it/s]global step 580000, trans_decision ep_re 376.075696806755

{"global_step": 580000, "eval_re": [376.075696806755, 376.075696806755, 
376.075696806755, 376.075696806755, 376.075696806755, 376.075696806755, 
376.075696806755, 376.075696806755, 376.075696806755, 376.075696806755], 
"eval_len": [70, 70, 70, 70, 70, 70, 70, 70, 70, 70]}

 59%|█████▉    | 589999/1000000 [9:47:00<5:05:58, 22.33it/s]global step 590000, trans_decision ep_re 322.30448589538224

{"global_step": 590000, "eval_re": [322.30448589538224, 322.30448589538224, 
322.30448589538224, 322.30448589538224, 322.30448589538224, 322.30448589538224, 
322.30448589538224, 322.30448589538224, 322.30448589538224, 322.30448589538224],
"eval_len": [63, 63, 63, 63, 63, 63, 63, 63, 63, 63]}

 60%|█████▉    | 599999/1000000 [9:57:00<4:56:31, 22.48it/s]global step 600000, trans_decision ep_re 357.37527603084015

{"global_step": 600000, "eval_re": [357.3752760308401, 357.3752760308401, 
357.3752760308401, 357.3752760308401, 357.3752760308401, 357.3752760308401, 
357.3752760308401, 357.3752760308401, 357.3752760308401, 357.3752760308401], 
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 61%|██████    | 609999/1000000 [10:07:00<4:47:50, 22.58it/s]global step 610000, trans_decision ep_re 435.75801877223495

{"global_step": 610000, "eval_re": [435.7580187722349, 435.7580187722349, 
435.7580187722349, 435.7580187722349, 435.7580187722349, 435.7580187722349, 
435.7580187722349, 435.7580187722349, 435.7580187722349, 435.7580187722349], 
"eval_len": [80, 80, 80, 80, 80, 80, 80, 80, 80, 80]}

 62%|██████▏   | 619999/1000000 [10:17:00<4:42:10, 22.45it/s]global step 620000, trans_decision ep_re 340.416654055968

{"global_step": 620000, "eval_re": [340.416654055968, 340.416654055968, 
340.416654055968, 340.416654055968, 340.416654055968, 340.416654055968, 
340.416654055968, 340.416654055968, 340.416654055968, 340.416654055968], 
"eval_len": [64, 64, 64, 64, 64, 64, 64, 64, 64, 64]}

 63%|██████▎   | 629999/1000000 [10:26:50<4:35:51, 22.35it/s]global step 630000, trans_decision ep_re 373.4661056481758

{"global_step": 630000, "eval_re": [373.46610564817576, 373.46610564817576, 
373.46610564817576, 373.46610564817576, 373.46610564817576, 373.46610564817576, 
373.46610564817576, 373.46610564817576, 373.46610564817576, 373.46610564817576],
"eval_len": [70, 70, 70, 70, 70, 70, 70, 70, 70, 70]}

 64%|██████▍   | 639999/1000000 [10:37:10<4:31:04, 22.13it/s]global step 640000, trans_decision ep_re 313.03231509529917

{"global_step": 640000, "eval_re": [313.03231509529917, 313.03231509529917, 
313.03231509529917, 313.03231509529917, 313.03231509529917, 313.03231509529917, 
313.03231509529917, 313.03231509529917, 313.03231509529917, 313.03231509529917],
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 65%|██████▍   | 649998/1000000 [10:47:20<4:20:46, 22.37it/s]global step 650000, trans_decision ep_re 321.9374591388718

{"global_step": 650000, "eval_re": [321.9374591388718, 321.9374591388718, 
321.9374591388718, 321.9374591388718, 321.9374591388718, 321.9374591388718, 
321.9374591388718, 321.9374591388718, 321.9374591388718, 321.9374591388718], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 66%|██████▌   | 659999/1000000 [10:57:12<4:13:27, 22.36it/s]global step 660000, trans_decision ep_re 298.6804266897192

{"global_step": 660000, "eval_re": [298.6804266897192, 298.6804266897192, 
298.6804266897192, 298.6804266897192, 298.6804266897192, 298.6804266897192, 
298.6804266897192, 298.6804266897192, 298.6804266897192, 298.6804266897192], 
"eval_len": [57, 57, 57, 57, 57, 57, 57, 57, 57, 57]}

 67%|██████▋   | 669999/1000000 [11:07:30<4:06:54, 22.28it/s]global step 670000, trans_decision ep_re 332.37434452517374

{"global_step": 670000, "eval_re": [332.3743445251737, 332.3743445251737, 
332.3743445251737, 332.3743445251737, 332.3743445251737, 332.3743445251737, 
332.3743445251737, 332.3743445251737, 332.3743445251737, 332.3743445251737], 
"eval_len": [64, 64, 64, 64, 64, 64, 64, 64, 64, 64]}

 68%|██████▊   | 679999/1000000 [11:17:21<3:58:34, 22.36it/s]global step 680000, trans_decision ep_re 292.1807280084932

{"global_step": 680000, "eval_re": [292.1807280084932, 292.1807280084932, 
292.1807280084932, 292.1807280084932, 292.1807280084932, 292.1807280084932, 
292.1807280084932, 292.1807280084932, 292.1807280084932, 292.1807280084932], 
"eval_len": [56, 56, 56, 56, 56, 56, 56, 56, 56, 56]}

 69%|██████▉   | 689997/1000000 [11:27:40<3:51:54, 22.28it/s]global step 690000, trans_decision ep_re 321.58607865820215

{"global_step": 690000, "eval_re": [321.5860786582022, 321.5860786582022, 
321.5860786582022, 321.5860786582022, 321.5860786582022, 321.5860786582022, 
321.5860786582022, 321.5860786582022, 321.5860786582022, 321.5860786582022], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 70%|██████▉   | 699999/1000000 [11:37:40<3:40:24, 22.69it/s]global step 700000, trans_decision ep_re 353.2123712740146

{"global_step": 700000, "eval_re": [353.21237127401463, 353.21237127401463, 
353.21237127401463, 353.21237127401463, 353.21237127401463, 353.21237127401463, 
353.21237127401463, 353.21237127401463, 353.21237127401463, 353.21237127401463],
"eval_len": [66, 66, 66, 66, 66, 66, 66, 66, 66, 66]}

 71%|███████   | 709997/1000000 [11:47:40<3:33:53, 22.60it/s]global step 710000, trans_decision ep_re 358.3566273083051

{"global_step": 710000, "eval_re": [358.3566273083051, 358.3566273083051, 
358.3566273083051, 358.3566273083051, 358.3566273083051, 358.3566273083051, 
358.3566273083051, 358.3566273083051, 358.3566273083051, 358.3566273083051], 
"eval_len": [68, 68, 68, 68, 68, 68, 68, 68, 68, 68]}

 72%|███████▏  | 719999/1000000 [11:57:22<3:25:18, 22.73it/s]global step 720000, trans_decision ep_re 337.4170900499465

{"global_step": 720000, "eval_re": [337.4170900499465, 337.4170900499465, 
337.4170900499465, 337.4170900499465, 337.4170900499465, 337.4170900499465, 
337.4170900499465, 337.4170900499465, 337.4170900499465, 337.4170900499465], 
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

 73%|███████▎  | 729997/1000000 [12:07:40<3:16:32, 22.90it/s]global step 730000, trans_decision ep_re 441.0129624999648

{"global_step": 730000, "eval_re": [441.0129624999648, 441.0129624999648, 
441.0129624999648, 441.0129624999648, 441.0129624999648, 441.0129624999648, 
441.0129624999648, 441.0129624999648, 441.0129624999648, 441.0129624999648], 
"eval_len": [81, 81, 81, 81, 81, 81, 81, 81, 81, 81]}

 74%|███████▍  | 739999/1000000 [12:17:40<3:12:01, 22.57it/s]global step 740000, trans_decision ep_re 416.429635741949

{"global_step": 740000, "eval_re": [416.42963574194897, 416.42963574194897, 
416.42963574194897, 416.42963574194897, 416.42963574194897, 416.42963574194897, 
416.42963574194897, 416.42963574194897, 416.42963574194897, 416.42963574194897],
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 75%|███████▍  | 749999/1000000 [12:27:40<3:03:58, 22.65it/s]global step 750000, trans_decision ep_re 373.9636274100403

{"global_step": 750000, "eval_re": [373.9636274100403, 373.9636274100403, 
373.9636274100403, 373.9636274100403, 373.9636274100403, 373.9636274100403, 
373.9636274100403, 373.9636274100403, 373.9636274100403, 373.9636274100403], 
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

 76%|███████▌  | 759999/1000000 [12:37:40<2:55:39, 22.77it/s]global step 760000, trans_decision ep_re 336.46924843001864

{"global_step": 760000, "eval_re": [336.4692484300187, 336.4692484300187, 
336.4692484300187, 336.4692484300187, 336.4692484300187, 336.4692484300187, 
336.4692484300187, 336.4692484300187, 336.4692484300187, 336.4692484300187], 
"eval_len": [64, 64, 64, 64, 64, 64, 64, 64, 64, 64]}

 77%|███████▋  | 769997/1000000 [12:47:21<2:48:43, 22.72it/s]global step 770000, trans_decision ep_re 385.60354532152144

{"global_step": 770000, "eval_re": [385.6035453215215, 385.6035453215215, 
385.6035453215215, 385.6035453215215, 385.6035453215215, 385.6035453215215, 
385.6035453215215, 385.6035453215215, 385.6035453215215, 385.6035453215215], 
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 78%|███████▊  | 779999/1000000 [12:57:30<2:41:34, 22.69it/s]global step 780000, trans_decision ep_re 424.8955021795894

{"global_step": 780000, "eval_re": [424.8955021795894, 424.8955021795894, 
424.8955021795894, 424.8955021795894, 424.8955021795894, 424.8955021795894, 
424.8955021795894, 424.8955021795894, 424.8955021795894, 424.8955021795894], 
"eval_len": [78, 78, 78, 78, 78, 78, 78, 78, 78, 78]}

 79%|███████▉  | 789999/1000000 [13:07:30<2:35:46, 22.47it/s]global step 790000, trans_decision ep_re 323.5570952941298

{"global_step": 790000, "eval_re": [323.5570952941298, 323.5570952941298, 
323.5570952941298, 323.5570952941298, 323.5570952941298, 323.5570952941298, 
323.5570952941298, 323.5570952941298, 323.5570952941298, 323.5570952941298], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 80%|███████▉  | 799998/1000000 [13:17:21<2:26:23, 22.77it/s]global step 800000, trans_decision ep_re 288.20840379639554

{"global_step": 800000, "eval_re": [288.20840379639554, 288.20840379639554, 
288.20840379639554, 288.20840379639554, 288.20840379639554, 288.20840379639554, 
288.20840379639554, 288.20840379639554, 288.20840379639554, 288.20840379639554],
"eval_len": [55, 55, 55, 55, 55, 55, 55, 55, 55, 55]}

 81%|████████  | 809999/1000000 [13:27:30<2:19:42, 22.67it/s]global step 810000, trans_decision ep_re 410.015049338953

{"global_step": 810000, "eval_re": [410.015049338953, 410.015049338953, 
410.015049338953, 410.015049338953, 410.015049338953, 410.015049338953, 
410.015049338953, 410.015049338953, 410.015049338953, 410.015049338953], 
"eval_len": [75, 75, 75, 75, 75, 75, 75, 75, 75, 75]}

 82%|████████▏ | 819999/1000000 [13:37:40<2:14:32, 22.30it/s]global step 820000, trans_decision ep_re 384.31235027333497

{"global_step": 820000, "eval_re": [384.3123502733349, 384.3123502733349, 
384.3123502733349, 384.3123502733349, 384.3123502733349, 384.3123502733349, 
384.3123502733349, 384.3123502733349, 384.3123502733349, 384.3123502733349], 
"eval_len": [70, 70, 70, 70, 70, 70, 70, 70, 70, 70]}

 83%|████████▎ | 829999/1000000 [13:47:40<2:05:16, 22.62it/s]global step 830000, trans_decision ep_re 385.36077565156774

{"global_step": 830000, "eval_re": [385.3607756515678, 385.3607756515678, 
385.3607756515678, 385.3607756515678, 385.3607756515678, 385.3607756515678, 
385.3607756515678, 385.3607756515678, 385.3607756515678, 385.3607756515678], 
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

 84%|████████▍ | 839998/1000000 [13:57:50<1:57:45, 22.65it/s]global step 840000, trans_decision ep_re 353.5703828586353

{"global_step": 840000, "eval_re": [353.5703828586353, 353.5703828586353, 
353.5703828586353, 353.5703828586353, 353.5703828586353, 353.5703828586353, 
353.5703828586353, 353.5703828586353, 353.5703828586353, 353.5703828586353], 
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

 85%|████████▍ | 849999/1000000 [14:07:50<1:50:31, 22.62it/s]global step 850000, trans_decision ep_re 464.39124810908305

{"global_step": 850000, "eval_re": [464.3912481090831, 464.3912481090831, 
464.3912481090831, 464.3912481090831, 464.3912481090831, 464.3912481090831, 
464.3912481090831, 464.3912481090831, 464.3912481090831, 464.3912481090831], 
"eval_len": [82, 82, 82, 82, 82, 82, 82, 82, 82, 82]}

 86%|████████▌ | 859999/1000000 [14:17:50<1:43:29, 22.55it/s]global step 860000, trans_decision ep_re 383.55582966095807

{"global_step": 860000, "eval_re": [383.5558296609581, 383.5558296609581, 
383.5558296609581, 383.5558296609581, 383.5558296609581, 383.5558296609581, 
383.5558296609581, 383.5558296609581, 383.5558296609581, 383.5558296609581], 
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

 87%|████████▋ | 869998/1000000 [14:28:00<1:36:03, 22.55it/s]global step 870000, trans_decision ep_re 412.6285747087515

{"global_step": 870000, "eval_re": [412.62857470875156, 412.62857470875156, 
412.62857470875156, 412.62857470875156, 412.62857470875156, 412.62857470875156, 
412.62857470875156, 412.62857470875156, 412.62857470875156, 412.62857470875156],
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 88%|████████▊ | 879999/1000000 [14:38:00<1:29:26, 22.36it/s]global step 880000, trans_decision ep_re 377.5068877571789

{"global_step": 880000, "eval_re": [377.5068877571789, 377.5068877571789, 
377.5068877571789, 377.5068877571789, 377.5068877571789, 377.5068877571789, 
377.5068877571789, 377.5068877571789, 377.5068877571789, 377.5068877571789], 
"eval_len": [68, 68, 68, 68, 68, 68, 68, 68, 68, 68]}

 89%|████████▉ | 889998/1000000 [14:48:10<1:20:55, 22.66it/s]global step 890000, trans_decision ep_re 381.30348847940047

{"global_step": 890000, "eval_re": [381.30348847940047, 381.30348847940047, 
381.30348847940047, 381.30348847940047, 381.30348847940047, 381.30348847940047, 
381.30348847940047, 381.30348847940047, 381.30348847940047, 381.30348847940047],
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

 90%|████████▉ | 899999/1000000 [14:58:10<1:14:52, 22.26it/s]global step 900000, trans_decision ep_re 366.7842529492376

{"global_step": 900000, "eval_re": [366.7842529492376, 366.7842529492376, 
366.7842529492376, 366.7842529492376, 366.7842529492376, 366.7842529492376, 
366.7842529492376, 366.7842529492376, 366.7842529492376, 366.7842529492376], 
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 91%|█████████ | 909997/1000000 [15:08:20<1:06:40, 22.50it/s]global step 910000, trans_decision ep_re 369.6128638031467

{"global_step": 910000, "eval_re": [369.61286380314664, 369.61286380314664, 
369.61286380314664, 369.61286380314664, 369.61286380314664, 369.61286380314664, 
369.61286380314664, 369.61286380314664, 369.61286380314664, 369.61286380314664],
"eval_len": [68, 68, 68, 68, 68, 68, 68, 68, 68, 68]}

 92%|█████████▏| 919999/1000000 [15:18:20<59:21, 22.46it/s]global step 920000, trans_decision ep_re 382.88651187580274

{"global_step": 920000, "eval_re": [382.8865118758028, 382.8865118758028, 
382.8865118758028, 382.8865118758028, 382.8865118758028, 382.8865118758028, 
382.8865118758028, 382.8865118758028, 382.8865118758028, 382.8865118758028], 
"eval_len": [70, 70, 70, 70, 70, 70, 70, 70, 70, 70]}

 93%|█████████▎| 929999/1000000 [15:28:20<52:17, 22.31it/s]global step 930000, trans_decision ep_re 421.14760632176313

{"global_step": 930000, "eval_re": [421.14760632176313, 421.14760632176313, 
421.14760632176313, 421.14760632176313, 421.14760632176313, 421.14760632176313, 
421.14760632176313, 421.14760632176313, 421.14760632176313, 421.14760632176313],
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 94%|█████████▍| 939998/1000000 [15:38:30<44:27, 22.49it/s]global step 940000, trans_decision ep_re 328.6000884600604

{"global_step": 940000, "eval_re": [328.6000884600605, 328.6000884600605, 
328.6000884600605, 328.6000884600605, 328.6000884600605, 328.6000884600605, 
328.6000884600605, 328.6000884600605, 328.6000884600605, 328.6000884600605], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 95%|█████████▍| 949999/1000000 [15:48:30<37:12, 22.40it/s]global step 950000, trans_decision ep_re 321.1347530138055

{"global_step": 950000, "eval_re": [321.1347530138055, 321.1347530138055, 
321.1347530138055, 321.1347530138055, 321.1347530138055, 321.1347530138055, 
321.1347530138055, 321.1347530138055, 321.1347530138055, 321.1347530138055], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 96%|█████████▌| 959998/1000000 [15:58:40<29:10, 22.85it/s]global step 960000, trans_decision ep_re 305.7135866428697

{"global_step": 960000, "eval_re": [305.71358664286964, 305.71358664286964, 
305.71358664286964, 305.71358664286964, 305.71358664286964, 305.71358664286964, 
305.71358664286964, 305.71358664286964, 305.71358664286964, 305.71358664286964],
"eval_len": [58, 58, 58, 58, 58, 58, 58, 58, 58, 58]}

 97%|█████████▋| 969999/1000000 [16:08:41<22:13, 22.50it/s]global step 970000, trans_decision ep_re 407.5861809699032

{"global_step": 970000, "eval_re": [407.5861809699032, 407.5861809699032, 
407.5861809699032, 407.5861809699032, 407.5861809699032, 407.5861809699032, 
407.5861809699032, 407.5861809699032, 407.5861809699032, 407.5861809699032], 
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 98%|█████████▊| 979999/1000000 [16:18:41<14:54, 22.35it/s]global step 980000, trans_decision ep_re 304.5096930709431

{"global_step": 980000, "eval_re": [304.50969307094306, 304.50969307094306, 
304.50969307094306, 304.50969307094306, 304.50969307094306, 304.50969307094306, 
304.50969307094306, 304.50969307094306, 304.50969307094306, 304.50969307094306],
"eval_len": [57, 57, 57, 57, 57, 57, 57, 57, 57, 57]}

 99%|█████████▉| 989999/1000000 [16:28:41<07:19, 22.76it/s]global step 990000, trans_decision ep_re 335.57955562587614

{"global_step": 990000, "eval_re": [335.5795556258761, 335.5795556258761, 
335.5795556258761, 335.5795556258761, 335.5795556258761, 335.5795556258761, 
335.5795556258761, 335.5795556258761, 335.5795556258761, 335.5795556258761], 
"eval_len": [62, 62, 62, 62, 62, 62, 62, 62, 62, 62]}

100%|█████████▉| 999998/1000000 [16:38:41<00:00, 22.87it/s]global step 1000000, trans_decision ep_re 316.109178826424

{"global_step": 1000000, "eval_re": [316.10917882642406, 316.10917882642406, 
316.10917882642406, 316.10917882642406, 316.10917882642406, 316.10917882642406, 
316.10917882642406, 316.10917882642406, 316.10917882642406, 316.10917882642406],
"eval_len": [59, 59, 59, 59, 59, 59, 59, 59, 59, 59]}

100%|██████████| 1000000/1000000 [16:38:48<00:00, 16.69it/s]
