
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:30<10:50:08, 25.38it/s]global step 10000, trans_decision ep_re 1059.7110556898201

{"global_step": 10000, "eval_re": [1059.71105568982, 1059.71105568982, 
1059.71105568982, 1059.71105568982, 1059.71105568982, 1059.71105568982, 
1059.71105568982, 1059.71105568982, 1059.71105568982, 1059.71105568982], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [13:31<10:46:09, 25.28it/s]global step 20000, trans_decision ep_re 262.99982666472386

{"global_step": 20000, "eval_re": [262.99982666472386, 262.99982666472386, 
262.99982666472386, 262.99982666472386, 262.99982666472386, 262.99982666472386, 
262.99982666472386, 262.99982666472386, 262.99982666472386, 262.99982666472386],
"eval_len": [160, 160, 160, 160, 160, 160, 160, 160, 160, 160]}

  3%|▎         | 29999/1000000 [22:30<10:33:31, 25.52it/s]global step 30000, trans_decision ep_re 83.92789585082815

{"global_step": 30000, "eval_re": [83.92789585082815, 83.92789585082815, 
83.92789585082815, 83.92789585082815, 83.92789585082815, 83.92789585082815, 
83.92789585082815, 83.92789585082815, 83.92789585082815, 83.92789585082815], 
"eval_len": [167, 167, 167, 167, 167, 167, 167, 167, 167, 167]}

  4%|▍         | 39998/1000000 [31:20<10:17:56, 25.89it/s]global step 40000, trans_decision ep_re 192.3090515502271

{"global_step": 40000, "eval_re": [192.30905155022708, 192.30905155022708, 
192.30905155022708, 192.30905155022708, 192.30905155022708, 192.30905155022708, 
192.30905155022708, 192.30905155022708, 192.30905155022708, 192.30905155022708],
"eval_len": [120, 120, 120, 120, 120, 120, 120, 120, 120, 120]}

  5%|▍         | 49999/1000000 [40:00<10:25:03, 25.33it/s]global step 50000, trans_decision ep_re 377.3074158031212

{"global_step": 50000, "eval_re": [377.30741580312116, 377.30741580312116, 
377.30741580312116, 377.30741580312116, 377.30741580312116, 377.30741580312116, 
377.30741580312116, 377.30741580312116, 377.30741580312116, 377.30741580312116],
"eval_len": [294, 294, 294, 294, 294, 294, 294, 294, 294, 294]}

  6%|▌         | 59999/1000000 [48:50<10:18:15, 25.34it/s]global step 60000, trans_decision ep_re 389.98938985522136

{"global_step": 60000, "eval_re": [389.98938985522136, 389.98938985522136, 
389.98938985522136, 389.98938985522136, 389.98938985522136, 389.98938985522136, 
389.98938985522136, 389.98938985522136, 389.98938985522136, 389.98938985522136],
"eval_len": [278, 278, 278, 278, 278, 278, 278, 278, 278, 278]}

  7%|▋         | 69999/1000000 [57:40<10:07:37, 25.51it/s]global step 70000, trans_decision ep_re 280.2004634671719

{"global_step": 70000, "eval_re": [280.20046346717186, 280.20046346717186, 
280.20046346717186, 280.20046346717186, 280.20046346717186, 280.20046346717186, 
280.20046346717186, 280.20046346717186, 280.20046346717186, 280.20046346717186],
"eval_len": [132, 132, 132, 132, 132, 132, 132, 132, 132, 132]}

  8%|▊         | 79999/1000000 [1:06:12<10:10:18, 25.12it/s]global step 80000, trans_decision ep_re 159.88314539549629

{"global_step": 80000, "eval_re": [159.88314539549629, 159.88314539549629, 
159.88314539549629, 159.88314539549629, 159.88314539549629, 159.88314539549629, 
159.88314539549629, 159.88314539549629, 159.88314539549629, 159.88314539549629],
"eval_len": [93, 93, 93, 93, 93, 93, 93, 93, 93, 93]}

  9%|▉         | 89997/1000000 [1:15:10<9:56:10, 25.44it/s]global step 90000, trans_decision ep_re 148.9765157982354

{"global_step": 90000, "eval_re": [148.9765157982354, 148.9765157982354, 
148.9765157982354, 148.9765157982354, 148.9765157982354, 148.9765157982354, 
148.9765157982354, 148.9765157982354, 148.9765157982354, 148.9765157982354], 
"eval_len": [115, 115, 115, 115, 115, 115, 115, 115, 115, 115]}

 10%|▉         | 99998/1000000 [1:24:00<9:47:52, 25.52it/s]global step 100000, trans_decision ep_re 218.38199183039123

{"global_step": 100000, "eval_re": [218.38199183039126, 218.38199183039126, 
218.38199183039126, 218.38199183039126, 218.38199183039126, 218.38199183039126, 
218.38199183039126, 218.38199183039126, 218.38199183039126, 218.38199183039126],
"eval_len": [132, 132, 132, 132, 132, 132, 132, 132, 132, 132]}

 11%|█         | 109998/1000000 [1:32:33<9:43:20, 25.43it/s]global step 110000, trans_decision ep_re 43.65939500929106

{"global_step": 110000, "eval_re": [43.65939500929105, 43.65939500929105, 
43.65939500929105, 43.65939500929105, 43.65939500929105, 43.65939500929105, 
43.65939500929105, 43.65939500929105, 43.65939500929105, 43.65939500929105], 
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 12%|█▏        | 119999/1000000 [1:41:30<9:38:01, 25.37it/s]global step 120000, trans_decision ep_re 831.6116587379053

{"global_step": 120000, "eval_re": [831.6116587379053, 831.6116587379053, 
831.6116587379053, 831.6116587379053, 831.6116587379053, 831.6116587379053, 
831.6116587379053, 831.6116587379053, 831.6116587379053, 831.6116587379053], 
"eval_len": [432, 432, 432, 432, 432, 432, 432, 432, 432, 432]}

 13%|█▎        | 129997/1000000 [1:50:30<9:35:00, 25.22it/s]global step 130000, trans_decision ep_re 182.21979081789544

{"global_step": 130000, "eval_re": [182.21979081789542, 182.21979081789542, 
182.21979081789542, 182.21979081789542, 182.21979081789542, 182.21979081789542, 
182.21979081789542, 182.21979081789542, 182.21979081789542, 182.21979081789542],
"eval_len": [112, 112, 112, 112, 112, 112, 112, 112, 112, 112]}

 14%|█▍        | 139998/1000000 [1:59:20<9:21:01, 25.55it/s]global step 140000, trans_decision ep_re 171.9479956515074

{"global_step": 140000, "eval_re": [171.9479956515074, 171.9479956515074, 
171.9479956515074, 171.9479956515074, 171.9479956515074, 171.9479956515074, 
171.9479956515074, 171.9479956515074, 171.9479956515074, 171.9479956515074], 
"eval_len": [91, 91, 91, 91, 91, 91, 91, 91, 91, 91]}

 15%|█▍        | 149998/1000000 [2:08:10<9:10:55, 25.71it/s]global step 150000, trans_decision ep_re 447.61367380896945

{"global_step": 150000, "eval_re": [447.61367380896945, 447.61367380896945, 
447.61367380896945, 447.61367380896945, 447.61367380896945, 447.61367380896945, 
447.61367380896945, 447.61367380896945, 447.61367380896945, 447.61367380896945],
"eval_len": [170, 170, 170, 170, 170, 170, 170, 170, 170, 170]}

 16%|█▌        | 159998/1000000 [2:16:43<9:04:50, 25.70it/s]global step 160000, trans_decision ep_re 42.47339589633965

{"global_step": 160000, "eval_re": [42.47339589633966, 42.47339589633966, 
42.47339589633966, 42.47339589633966, 42.47339589633966, 42.47339589633966, 
42.47339589633966, 42.47339589633966, 42.47339589633966, 42.47339589633966], 
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 17%|█▋        | 169999/1000000 [2:25:40<9:14:22, 24.95it/s]global step 170000, trans_decision ep_re 512.8098085819394

{"global_step": 170000, "eval_re": [512.8098085819394, 512.8098085819394, 
512.8098085819394, 512.8098085819394, 512.8098085819394, 512.8098085819394, 
512.8098085819394, 512.8098085819394, 512.8098085819394, 512.8098085819394], 
"eval_len": [199, 199, 199, 199, 199, 199, 199, 199, 199, 199]}

 18%|█▊        | 179999/1000000 [2:34:21<8:59:03, 25.35it/s]global step 180000, trans_decision ep_re 274.8156039128417

{"global_step": 180000, "eval_re": [274.8156039128417, 274.8156039128417, 
274.8156039128417, 274.8156039128417, 274.8156039128417, 274.8156039128417, 
274.8156039128417, 274.8156039128417, 274.8156039128417, 274.8156039128417], 
"eval_len": [129, 129, 129, 129, 129, 129, 129, 129, 129, 129]}

 19%|█▉        | 189999/1000000 [2:43:30<9:14:07, 24.36it/s]global step 190000, trans_decision ep_re 216.67642050609143

{"global_step": 190000, "eval_re": [216.67642050609143, 216.67642050609143, 
216.67642050609143, 216.67642050609143, 216.67642050609143, 216.67642050609143, 
216.67642050609143, 216.67642050609143, 216.67642050609143, 216.67642050609143],
"eval_len": [165, 165, 165, 165, 165, 165, 165, 165, 165, 165]}

 20%|█▉        | 199998/1000000 [2:52:20<8:50:09, 25.15it/s]global step 200000, trans_decision ep_re 669.1626567329386

{"global_step": 200000, "eval_re": [669.1626567329386, 669.1626567329386, 
669.1626567329386, 669.1626567329386, 669.1626567329386, 669.1626567329386, 
669.1626567329386, 669.1626567329386, 669.1626567329386, 669.1626567329386], 
"eval_len": [241, 241, 241, 241, 241, 241, 241, 241, 241, 241]}

 21%|██        | 209997/1000000 [3:01:10<8:42:57, 25.18it/s]global step 210000, trans_decision ep_re 41.32468530990492

{"global_step": 210000, "eval_re": [41.32468530990492, 41.32468530990492, 
41.32468530990492, 41.32468530990492, 41.32468530990492, 41.32468530990492, 
41.32468530990492, 41.32468530990492, 41.32468530990492, 41.32468530990492], 
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 22%|██▏       | 219998/1000000 [3:10:00<8:37:58, 25.10it/s]global step 220000, trans_decision ep_re 333.9426838812543

{"global_step": 220000, "eval_re": [333.9426838812543, 333.9426838812543, 
333.9426838812543, 333.9426838812543, 333.9426838812543, 333.9426838812543, 
333.9426838812543, 333.9426838812543, 333.9426838812543, 333.9426838812543], 
"eval_len": [149, 149, 149, 149, 149, 149, 149, 149, 149, 149]}

 23%|██▎       | 229998/1000000 [3:18:50<8:27:07, 25.31it/s]global step 230000, trans_decision ep_re 366.8205040592592

{"global_step": 230000, "eval_re": [366.8205040592592, 366.8205040592592, 
366.8205040592592, 366.8205040592592, 366.8205040592592, 366.8205040592592, 
366.8205040592592, 366.8205040592592, 366.8205040592592, 366.8205040592592], 
"eval_len": [168, 168, 168, 168, 168, 168, 168, 168, 168, 168]}

 24%|██▍       | 239998/1000000 [3:27:40<8:15:07, 25.58it/s]global step 240000, trans_decision ep_re 779.5811765991141

{"global_step": 240000, "eval_re": [779.5811765991142, 779.5811765991142, 
779.5811765991142, 779.5811765991142, 779.5811765991142, 779.5811765991142, 
779.5811765991142, 779.5811765991142, 779.5811765991142, 779.5811765991142], 
"eval_len": [279, 279, 279, 279, 279, 279, 279, 279, 279, 279]}

 25%|██▍       | 249999/1000000 [3:36:30<8:27:29, 24.63it/s]global step 250000, trans_decision ep_re 784.6561337911571

{"global_step": 250000, "eval_re": [784.6561337911571, 784.6561337911571, 
784.6561337911571, 784.6561337911571, 784.6561337911571, 784.6561337911571, 
784.6561337911571, 784.6561337911571, 784.6561337911571, 784.6561337911571], 
"eval_len": [253, 253, 253, 253, 253, 253, 253, 253, 253, 253]}

 26%|██▌       | 259999/1000000 [3:45:30<8:11:27, 25.10it/s]global step 260000, trans_decision ep_re 1154.2703771365593

{"global_step": 260000, "eval_re": [1154.2703771365593, 1154.2703771365593, 
1154.2703771365593, 1154.2703771365593, 1154.2703771365593, 1154.2703771365593, 
1154.2703771365593, 1154.2703771365593, 1154.2703771365593, 1154.2703771365593],
"eval_len": [426, 426, 426, 426, 426, 426, 426, 426, 426, 426]}

 27%|██▋       | 269999/1000000 [3:54:30<8:02:58, 25.19it/s]global step 270000, trans_decision ep_re 720.397386987763

{"global_step": 270000, "eval_re": [720.397386987763, 720.397386987763, 
720.397386987763, 720.397386987763, 720.397386987763, 720.397386987763, 
720.397386987763, 720.397386987763, 720.397386987763, 720.397386987763], 
"eval_len": [237, 237, 237, 237, 237, 237, 237, 237, 237, 237]}

 28%|██▊       | 279999/1000000 [4:03:20<7:57:17, 25.14it/s]global step 280000, trans_decision ep_re 732.1295097604007

{"global_step": 280000, "eval_re": [732.1295097604007, 732.1295097604007, 
732.1295097604007, 732.1295097604007, 732.1295097604007, 732.1295097604007, 
732.1295097604007, 732.1295097604007, 732.1295097604007, 732.1295097604007], 
"eval_len": [256, 256, 256, 256, 256, 256, 256, 256, 256, 256]}

 29%|██▉       | 289999/1000000 [4:12:10<7:46:11, 25.38it/s]global step 290000, trans_decision ep_re 984.0006038667259

{"global_step": 290000, "eval_re": [984.000603866726, 984.000603866726, 
984.000603866726, 984.000603866726, 984.000603866726, 984.000603866726, 
984.000603866726, 984.000603866726, 984.000603866726, 984.000603866726], 
"eval_len": [311, 311, 311, 311, 311, 311, 311, 311, 311, 311]}

 30%|██▉       | 299998/1000000 [4:21:00<7:42:55, 25.20it/s]global step 300000, trans_decision ep_re 471.8812795887267

{"global_step": 300000, "eval_re": [471.88127958872667, 471.88127958872667, 
471.88127958872667, 471.88127958872667, 471.88127958872667, 471.88127958872667, 
471.88127958872667, 471.88127958872667, 471.88127958872667, 471.88127958872667],
"eval_len": [181, 181, 181, 181, 181, 181, 181, 181, 181, 181]}

 31%|███       | 309999/1000000 [4:29:50<7:33:03, 25.38it/s]global step 310000, trans_decision ep_re 1134.45317807154

{"global_step": 310000, "eval_re": [1134.4531780715397, 1134.4531780715397, 
1134.4531780715397, 1134.4531780715397, 1134.4531780715397, 1134.4531780715397, 
1134.4531780715397, 1134.4531780715397, 1134.4531780715397, 1134.4531780715397],
"eval_len": [325, 325, 325, 325, 325, 325, 325, 325, 325, 325]}

 32%|███▏      | 319998/1000000 [4:38:40<7:23:53, 25.53it/s]global step 320000, trans_decision ep_re 684.5783993983796

{"global_step": 320000, "eval_re": [684.5783993983796, 684.5783993983796, 
684.5783993983796, 684.5783993983796, 684.5783993983796, 684.5783993983796, 
684.5783993983796, 684.5783993983796, 684.5783993983796, 684.5783993983796], 
"eval_len": [245, 245, 245, 245, 245, 245, 245, 245, 245, 245]}

 33%|███▎      | 329998/1000000 [4:47:30<7:15:06, 25.66it/s]global step 330000, trans_decision ep_re 663.6726132667452

{"global_step": 330000, "eval_re": [663.6726132667451, 663.6726132667451, 
663.6726132667451, 663.6726132667451, 663.6726132667451, 663.6726132667451, 
663.6726132667451, 663.6726132667451, 663.6726132667451, 663.6726132667451], 
"eval_len": [239, 239, 239, 239, 239, 239, 239, 239, 239, 239]}

 34%|███▍      | 339998/1000000 [4:56:20<7:06:36, 25.79it/s]global step 340000, trans_decision ep_re 884.137774603644

{"global_step": 340000, "eval_re": [884.137774603644, 884.137774603644, 
884.137774603644, 884.137774603644, 884.137774603644, 884.137774603644, 
884.137774603644, 884.137774603644, 884.137774603644, 884.137774603644], 
"eval_len": [290, 290, 290, 290, 290, 290, 290, 290, 290, 290]}

 35%|███▍      | 349998/1000000 [5:05:10<7:11:42, 25.09it/s]global step 350000, trans_decision ep_re 520.061694329446

{"global_step": 350000, "eval_re": [520.0616943294459, 520.0616943294459, 
520.0616943294459, 520.0616943294459, 520.0616943294459, 520.0616943294459, 
520.0616943294459, 520.0616943294459, 520.0616943294459, 520.0616943294459], 
"eval_len": [194, 194, 194, 194, 194, 194, 194, 194, 194, 194]}

 36%|███▌      | 359998/1000000 [5:14:00<6:53:32, 25.79it/s]global step 360000, trans_decision ep_re 409.37514208958345

{"global_step": 360000, "eval_re": [409.3751420895835, 409.3751420895835, 
409.3751420895835, 409.3751420895835, 409.3751420895835, 409.3751420895835, 
409.3751420895835, 409.3751420895835, 409.3751420895835, 409.3751420895835], 
"eval_len": [180, 180, 180, 180, 180, 180, 180, 180, 180, 180]}

 37%|███▋      | 369999/1000000 [5:22:50<6:55:50, 25.25it/s]global step 370000, trans_decision ep_re 1860.0209895119647

{"global_step": 370000, "eval_re": [1860.0209895119644, 1860.0209895119644, 
1860.0209895119644, 1860.0209895119644, 1860.0209895119644, 1860.0209895119644, 
1860.0209895119644, 1860.0209895119644, 1860.0209895119644, 1860.0209895119644],
"eval_len": [470, 470, 470, 470, 470, 470, 470, 470, 470, 470]}

 38%|███▊      | 379999/1000000 [5:31:40<6:49:25, 25.24it/s]global step 380000, trans_decision ep_re 876.0101274327533

{"global_step": 380000, "eval_re": [876.0101274327534, 876.0101274327534, 
876.0101274327534, 876.0101274327534, 876.0101274327534, 876.0101274327534, 
876.0101274327534, 876.0101274327534, 876.0101274327534, 876.0101274327534], 
"eval_len": [308, 308, 308, 308, 308, 308, 308, 308, 308, 308]}

 39%|███▉      | 389998/1000000 [5:40:30<6:34:43, 25.76it/s]global step 390000, trans_decision ep_re 480.3247708518571

{"global_step": 390000, "eval_re": [480.32477085185707, 480.32477085185707, 
480.32477085185707, 480.32477085185707, 480.32477085185707, 480.32477085185707, 
480.32477085185707, 480.32477085185707, 480.32477085185707, 480.32477085185707],
"eval_len": [194, 194, 194, 194, 194, 194, 194, 194, 194, 194]}

 40%|███▉      | 399999/1000000 [5:49:20<6:38:58, 25.06it/s]global step 400000, trans_decision ep_re 119.03672302358845

{"global_step": 400000, "eval_re": [119.03672302358848, 119.03672302358848, 
119.03672302358848, 119.03672302358848, 119.03672302358848, 119.03672302358848, 
119.03672302358848, 119.03672302358848, 119.03672302358848, 119.03672302358848],
"eval_len": [100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}

 41%|████      | 409998/1000000 [5:58:10<6:26:34, 25.44it/s]global step 410000, trans_decision ep_re 1747.330773383736

{"global_step": 410000, "eval_re": [1747.3307733837357, 1747.3307733837357, 
1747.3307733837357, 1747.3307733837357, 1747.3307733837357, 1747.3307733837357, 
1747.3307733837357, 1747.3307733837357, 1747.3307733837357, 1747.3307733837357],
"eval_len": [465, 465, 465, 465, 465, 465, 465, 465, 465, 465]}

 42%|████▏     | 419999/1000000 [6:07:00<6:24:56, 25.11it/s]global step 420000, trans_decision ep_re 591.217293499065

{"global_step": 420000, "eval_re": [591.217293499065, 591.217293499065, 
591.217293499065, 591.217293499065, 591.217293499065, 591.217293499065, 
591.217293499065, 591.217293499065, 591.217293499065, 591.217293499065], 
"eval_len": [217, 217, 217, 217, 217, 217, 217, 217, 217, 217]}

 43%|████▎     | 429998/1000000 [6:15:50<6:08:54, 25.75it/s]global step 430000, trans_decision ep_re 217.0990268780319

{"global_step": 430000, "eval_re": [217.09902687803194, 217.09902687803194, 
217.09902687803194, 217.09902687803194, 217.09902687803194, 217.09902687803194, 
217.09902687803194, 217.09902687803194, 217.09902687803194, 217.09902687803194],
"eval_len": [133, 133, 133, 133, 133, 133, 133, 133, 133, 133]}

 44%|████▍     | 439997/1000000 [6:24:40<6:08:00, 25.36it/s]global step 440000, trans_decision ep_re 490.267755598922

{"global_step": 440000, "eval_re": [490.267755598922, 490.267755598922, 
490.267755598922, 490.267755598922, 490.267755598922, 490.267755598922, 
490.267755598922, 490.267755598922, 490.267755598922, 490.267755598922], 
"eval_len": [207, 207, 207, 207, 207, 207, 207, 207, 207, 207]}

 45%|████▍     | 449997/1000000 [6:33:30<6:01:30, 25.36it/s]global step 450000, trans_decision ep_re 1544.392530278946

{"global_step": 450000, "eval_re": [1544.3925302789457, 1544.3925302789457, 
1544.3925302789457, 1544.3925302789457, 1544.3925302789457, 1544.3925302789457, 
1544.3925302789457, 1544.3925302789457, 1544.3925302789457, 1544.3925302789457],
"eval_len": [500, 500, 500, 500, 500, 500, 500, 500, 500, 500]}

 46%|████▌     | 459998/1000000 [6:42:30<5:51:49, 25.58it/s]global step 460000, trans_decision ep_re 863.0663604909842

{"global_step": 460000, "eval_re": [863.0663604909842, 863.0663604909842, 
863.0663604909842, 863.0663604909842, 863.0663604909842, 863.0663604909842, 
863.0663604909842, 863.0663604909842, 863.0663604909842, 863.0663604909842], 
"eval_len": [280, 280, 280, 280, 280, 280, 280, 280, 280, 280]}

 47%|████▋     | 469997/1000000 [6:51:20<5:47:53, 25.39it/s]global step 470000, trans_decision ep_re 198.68969601294924

{"global_step": 470000, "eval_re": [198.68969601294924, 198.68969601294924, 
198.68969601294924, 198.68969601294924, 198.68969601294924, 198.68969601294924, 
198.68969601294924, 198.68969601294924, 198.68969601294924, 198.68969601294924],
"eval_len": [133, 133, 133, 133, 133, 133, 133, 133, 133, 133]}

 48%|████▊     | 479999/1000000 [7:00:10<5:47:33, 24.94it/s]global step 480000, trans_decision ep_re 518.8699599473223

{"global_step": 480000, "eval_re": [518.8699599473223, 518.8699599473223, 
518.8699599473223, 518.8699599473223, 518.8699599473223, 518.8699599473223, 
518.8699599473223, 518.8699599473223, 518.8699599473223, 518.8699599473223], 
"eval_len": [210, 210, 210, 210, 210, 210, 210, 210, 210, 210]}

 49%|████▉     | 489999/1000000 [7:09:00<5:37:25, 25.19it/s]global step 490000, trans_decision ep_re 560.7907908315268

{"global_step": 490000, "eval_re": [560.7907908315269, 560.7907908315269, 
560.7907908315269, 560.7907908315269, 560.7907908315269, 560.7907908315269, 
560.7907908315269, 560.7907908315269, 560.7907908315269, 560.7907908315269], 
"eval_len": [207, 207, 207, 207, 207, 207, 207, 207, 207, 207]}

 50%|████▉     | 499999/1000000 [7:17:50<5:31:02, 25.17it/s]global step 500000, trans_decision ep_re 1191.6378756184897

{"global_step": 500000, "eval_re": [1191.6378756184897, 1191.6378756184897, 
1191.6378756184897, 1191.6378756184897, 1191.6378756184897, 1191.6378756184897, 
1191.6378756184897, 1191.6378756184897, 1191.6378756184897, 1191.6378756184897],
"eval_len": [359, 359, 359, 359, 359, 359, 359, 359, 359, 359]}

 51%|█████     | 509998/1000000 [7:26:40<5:23:36, 25.24it/s]global step 510000, trans_decision ep_re 1384.2288328767022

{"global_step": 510000, "eval_re": [1384.228832876702, 1384.228832876702, 
1384.228832876702, 1384.228832876702, 1384.228832876702, 1384.228832876702, 
1384.228832876702, 1384.228832876702, 1384.228832876702, 1384.228832876702], 
"eval_len": [348, 348, 348, 348, 348, 348, 348, 348, 348, 348]}

 52%|█████▏    | 519999/1000000 [7:35:30<5:16:12, 25.30it/s]global step 520000, trans_decision ep_re 1805.3828023138892

{"global_step": 520000, "eval_re": [1805.3828023138892, 1805.3828023138892, 
1805.3828023138892, 1805.3828023138892, 1805.3828023138892, 1805.3828023138892, 
1805.3828023138892, 1805.3828023138892, 1805.3828023138892, 1805.3828023138892],
"eval_len": [449, 449, 449, 449, 449, 449, 449, 449, 449, 449]}

 53%|█████▎    | 529998/1000000 [7:44:30<5:08:15, 25.41it/s]global step 530000, trans_decision ep_re 414.11214033428575

{"global_step": 530000, "eval_re": [414.1121403342858, 414.1121403342858, 
414.1121403342858, 414.1121403342858, 414.1121403342858, 414.1121403342858, 
414.1121403342858, 414.1121403342858, 414.1121403342858, 414.1121403342858], 
"eval_len": [168, 168, 168, 168, 168, 168, 168, 168, 168, 168]}

 54%|█████▍    | 539999/1000000 [7:53:02<5:04:39, 25.16it/s]global step 540000, trans_decision ep_re 79.91792067846437

{"global_step": 540000, "eval_re": [79.91792067846437, 79.91792067846437, 
79.91792067846437, 79.91792067846437, 79.91792067846437, 79.91792067846437, 
79.91792067846437, 79.91792067846437, 79.91792067846437, 79.91792067846437], 
"eval_len": [85, 85, 85, 85, 85, 85, 85, 85, 85, 85]}

 55%|█████▍    | 549999/1000000 [8:02:00<4:58:30, 25.12it/s]global step 550000, trans_decision ep_re 752.2256082134561

{"global_step": 550000, "eval_re": [752.225608213456, 752.225608213456, 
752.225608213456, 752.225608213456, 752.225608213456, 752.225608213456, 
752.225608213456, 752.225608213456, 752.225608213456, 752.225608213456], 
"eval_len": [250, 250, 250, 250, 250, 250, 250, 250, 250, 250]}

 56%|█████▌    | 559999/1000000 [8:10:50<4:50:43, 25.22it/s]global step 560000, trans_decision ep_re 413.91392118533724

{"global_step": 560000, "eval_re": [413.9139211853372, 413.9139211853372, 
413.9139211853372, 413.9139211853372, 413.9139211853372, 413.9139211853372, 
413.9139211853372, 413.9139211853372, 413.9139211853372, 413.9139211853372], 
"eval_len": [167, 167, 167, 167, 167, 167, 167, 167, 167, 167]}

 57%|█████▋    | 569997/1000000 [8:19:40<4:44:53, 25.16it/s]global step 570000, trans_decision ep_re 556.0693943699731

{"global_step": 570000, "eval_re": [556.069394369973, 556.069394369973, 
556.069394369973, 556.069394369973, 556.069394369973, 556.069394369973, 
556.069394369973, 556.069394369973, 556.069394369973, 556.069394369973], 
"eval_len": [223, 223, 223, 223, 223, 223, 223, 223, 223, 223]}

 58%|█████▊    | 579997/1000000 [8:28:30<4:38:55, 25.10it/s]global step 580000, trans_decision ep_re 1931.5399477246078

{"global_step": 580000, "eval_re": [1931.539947724608, 1931.539947724608, 
1931.539947724608, 1931.539947724608, 1931.539947724608, 1931.539947724608, 
1931.539947724608, 1931.539947724608, 1931.539947724608, 1931.539947724608], 
"eval_len": [482, 482, 482, 482, 482, 482, 482, 482, 482, 482]}

 59%|█████▉    | 589999/1000000 [8:37:30<4:28:58, 25.41it/s]global step 590000, trans_decision ep_re 1121.0855634182649

{"global_step": 590000, "eval_re": [1121.0855634182649, 1121.0855634182649, 
1121.0855634182649, 1121.0855634182649, 1121.0855634182649, 1121.0855634182649, 
1121.0855634182649, 1121.0855634182649, 1121.0855634182649, 1121.0855634182649],
"eval_len": [324, 324, 324, 324, 324, 324, 324, 324, 324, 324]}

 60%|█████▉    | 599998/1000000 [8:46:20<4:20:59, 25.54it/s]global step 600000, trans_decision ep_re 1817.1392930260622

{"global_step": 600000, "eval_re": [1817.1392930260622, 1817.1392930260622, 
1817.1392930260622, 1817.1392930260622, 1817.1392930260622, 1817.1392930260622, 
1817.1392930260622, 1817.1392930260622, 1817.1392930260622, 1817.1392930260622],
"eval_len": [477, 477, 477, 477, 477, 477, 477, 477, 477, 477]}

 61%|██████    | 609997/1000000 [8:55:20<4:18:36, 25.14it/s]global step 610000, trans_decision ep_re 534.8121246104748

{"global_step": 610000, "eval_re": [534.8121246104748, 534.8121246104748, 
534.8121246104748, 534.8121246104748, 534.8121246104748, 534.8121246104748, 
534.8121246104748, 534.8121246104748, 534.8121246104748, 534.8121246104748], 
"eval_len": [212, 212, 212, 212, 212, 212, 212, 212, 212, 212]}

 62%|██████▏   | 619997/1000000 [9:04:10<4:15:44, 24.77it/s]global step 620000, trans_decision ep_re 996.6307577470028

{"global_step": 620000, "eval_re": [996.6307577470027, 996.6307577470027, 
996.6307577470027, 996.6307577470027, 996.6307577470027, 996.6307577470027, 
996.6307577470027, 996.6307577470027, 996.6307577470027, 996.6307577470027], 
"eval_len": [293, 293, 293, 293, 293, 293, 293, 293, 293, 293]}

 63%|██████▎   | 629999/1000000 [9:12:52<4:07:02, 24.96it/s]global step 630000, trans_decision ep_re 241.428454042415

{"global_step": 630000, "eval_re": [241.428454042415, 241.428454042415, 
241.428454042415, 241.428454042415, 241.428454042415, 241.428454042415, 
241.428454042415, 241.428454042415, 241.428454042415, 241.428454042415], 
"eval_len": [144, 144, 144, 144, 144, 144, 144, 144, 144, 144]}

 64%|██████▍   | 639999/1000000 [9:22:00<3:58:30, 25.16it/s]global step 640000, trans_decision ep_re 1388.7761276437407

{"global_step": 640000, "eval_re": [1388.7761276437404, 1388.7761276437404, 
1388.7761276437404, 1388.7761276437404, 1388.7761276437404, 1388.7761276437404, 
1388.7761276437404, 1388.7761276437404, 1388.7761276437404, 1388.7761276437404],
"eval_len": [356, 356, 356, 356, 356, 356, 356, 356, 356, 356]}

 65%|██████▍   | 649999/1000000 [9:30:50<3:53:28, 24.99it/s]global step 650000, trans_decision ep_re 516.0996379549243

{"global_step": 650000, "eval_re": [516.0996379549243, 516.0996379549243, 
516.0996379549243, 516.0996379549243, 516.0996379549243, 516.0996379549243, 
516.0996379549243, 516.0996379549243, 516.0996379549243, 516.0996379549243], 
"eval_len": [191, 191, 191, 191, 191, 191, 191, 191, 191, 191]}

 66%|██████▌   | 659999/1000000 [9:39:40<3:43:56, 25.30it/s]global step 660000, trans_decision ep_re 622.5763674500374

{"global_step": 660000, "eval_re": [622.5763674500374, 622.5763674500374, 
622.5763674500374, 622.5763674500374, 622.5763674500374, 622.5763674500374, 
622.5763674500374, 622.5763674500374, 622.5763674500374, 622.5763674500374], 
"eval_len": [220, 220, 220, 220, 220, 220, 220, 220, 220, 220]}

 67%|██████▋   | 669997/1000000 [9:48:30<3:37:58, 25.23it/s]global step 670000, trans_decision ep_re 88.29319639292575

{"global_step": 670000, "eval_re": [88.29319639292575, 88.29319639292575, 
88.29319639292575, 88.29319639292575, 88.29319639292575, 88.29319639292575, 
88.29319639292575, 88.29319639292575, 88.29319639292575, 88.29319639292575], 
"eval_len": [89, 89, 89, 89, 89, 89, 89, 89, 89, 89]}

 68%|██████▊   | 679999/1000000 [9:57:20<3:31:22, 25.23it/s]global step 680000, trans_decision ep_re 481.24867599470156

{"global_step": 680000, "eval_re": [481.24867599470156, 481.24867599470156, 
481.24867599470156, 481.24867599470156, 481.24867599470156, 481.24867599470156, 
481.24867599470156, 481.24867599470156, 481.24867599470156, 481.24867599470156],
"eval_len": [190, 190, 190, 190, 190, 190, 190, 190, 190, 190]}

 69%|██████▉   | 689998/1000000 [10:06:00<3:23:01, 25.45it/s]global step 690000, trans_decision ep_re 1113.007674269877

{"global_step": 690000, "eval_re": [1113.0076742698773, 1113.0076742698773, 
1113.0076742698773, 1113.0076742698773, 1113.0076742698773, 1113.0076742698773, 
1113.0076742698773, 1113.0076742698773, 1113.0076742698773, 1113.0076742698773],
"eval_len": [458, 458, 458, 458, 458, 458, 458, 458, 458, 458]}

 70%|██████▉   | 699997/1000000 [10:15:00<3:16:45, 25.41it/s]global step 700000, trans_decision ep_re 768.0589397349879

{"global_step": 700000, "eval_re": [768.058939734988, 768.058939734988, 
768.058939734988, 768.058939734988, 768.058939734988, 768.058939734988, 
768.058939734988, 768.058939734988, 768.058939734988, 768.058939734988], 
"eval_len": [247, 247, 247, 247, 247, 247, 247, 247, 247, 247]}

 71%|███████   | 709997/1000000 [10:23:50<3:12:35, 25.10it/s]global step 710000, trans_decision ep_re 1265.832344330846

{"global_step": 710000, "eval_re": [1265.832344330846, 1265.832344330846, 
1265.832344330846, 1265.832344330846, 1265.832344330846, 1265.832344330846, 
1265.832344330846, 1265.832344330846, 1265.832344330846, 1265.832344330846], 
"eval_len": [334, 334, 334, 334, 334, 334, 334, 334, 334, 334]}

 72%|███████▏  | 719999/1000000 [10:32:40<3:04:50, 25.25it/s]global step 720000, trans_decision ep_re 77.5993224181175

{"global_step": 720000, "eval_re": [77.5993224181175, 77.5993224181175, 
77.5993224181175, 77.5993224181175, 77.5993224181175, 77.5993224181175, 
77.5993224181175, 77.5993224181175, 77.5993224181175, 77.5993224181175], 
"eval_len": [87, 87, 87, 87, 87, 87, 87, 87, 87, 87]}

 73%|███████▎  | 729998/1000000 [10:41:30<2:54:39, 25.77it/s]global step 730000, trans_decision ep_re 522.7217607067876

{"global_step": 730000, "eval_re": [522.7217607067874, 522.7217607067874, 
522.7217607067874, 522.7217607067874, 522.7217607067874, 522.7217607067874, 
522.7217607067874, 522.7217607067874, 522.7217607067874, 522.7217607067874], 
"eval_len": [240, 240, 240, 240, 240, 240, 240, 240, 240, 240]}

 74%|███████▍  | 739997/1000000 [10:50:20<2:52:23, 25.14it/s]global step 740000, trans_decision ep_re 1159.7897255243784

{"global_step": 740000, "eval_re": [1159.7897255243781, 1159.7897255243781, 
1159.7897255243781, 1159.7897255243781, 1159.7897255243781, 1159.7897255243781, 
1159.7897255243781, 1159.7897255243781, 1159.7897255243781, 1159.7897255243781],
"eval_len": [319, 319, 319, 319, 319, 319, 319, 319, 319, 319]}

 75%|███████▍  | 749999/1000000 [10:59:10<2:44:59, 25.25it/s]global step 750000, trans_decision ep_re 735.3684222276731

{"global_step": 750000, "eval_re": [735.3684222276731, 735.3684222276731, 
735.3684222276731, 735.3684222276731, 735.3684222276731, 735.3684222276731, 
735.3684222276731, 735.3684222276731, 735.3684222276731, 735.3684222276731], 
"eval_len": [235, 235, 235, 235, 235, 235, 235, 235, 235, 235]}

 76%|███████▌  | 759999/1000000 [11:08:00<2:39:48, 25.03it/s]global step 760000, trans_decision ep_re 67.79736091660972

{"global_step": 760000, "eval_re": [67.79736091660972, 67.79736091660972, 
67.79736091660972, 67.79736091660972, 67.79736091660972, 67.79736091660972, 
67.79736091660972, 67.79736091660972, 67.79736091660972, 67.79736091660972], 
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 77%|███████▋  | 769998/1000000 [11:16:50<2:29:12, 25.69it/s]global step 770000, trans_decision ep_re 863.0942406357693

{"global_step": 770000, "eval_re": [863.0942406357693, 863.0942406357693, 
863.0942406357693, 863.0942406357693, 863.0942406357693, 863.0942406357693, 
863.0942406357693, 863.0942406357693, 863.0942406357693, 863.0942406357693], 
"eval_len": [261, 261, 261, 261, 261, 261, 261, 261, 261, 261]}

 78%|███████▊  | 779998/1000000 [11:25:40<2:23:02, 25.64it/s]global step 780000, trans_decision ep_re 194.41772301611354

{"global_step": 780000, "eval_re": [194.41772301611354, 194.41772301611354, 
194.41772301611354, 194.41772301611354, 194.41772301611354, 194.41772301611354, 
194.41772301611354, 194.41772301611354, 194.41772301611354, 194.41772301611354],
"eval_len": [117, 117, 117, 117, 117, 117, 117, 117, 117, 117]}

 79%|███████▉  | 789999/1000000 [11:34:30<2:18:14, 25.32it/s]global step 790000, trans_decision ep_re 880.7209023870303

{"global_step": 790000, "eval_re": [880.7209023870302, 880.7209023870302, 
880.7209023870302, 880.7209023870302, 880.7209023870302, 880.7209023870302, 
880.7209023870302, 880.7209023870302, 880.7209023870302, 880.7209023870302], 
"eval_len": [280, 280, 280, 280, 280, 280, 280, 280, 280, 280]}

 80%|███████▉  | 799998/1000000 [11:43:20<2:12:35, 25.14it/s]global step 800000, trans_decision ep_re 493.2011781711846

{"global_step": 800000, "eval_re": [493.2011781711846, 493.2011781711846, 
493.2011781711846, 493.2011781711846, 493.2011781711846, 493.2011781711846, 
493.2011781711846, 493.2011781711846, 493.2011781711846, 493.2011781711846], 
"eval_len": [185, 185, 185, 185, 185, 185, 185, 185, 185, 185]}

 81%|████████  | 809999/1000000 [11:51:51<2:05:18, 25.27it/s]global step 810000, trans_decision ep_re 171.51999452330443

{"global_step": 810000, "eval_re": [171.51999452330446, 171.51999452330446, 
171.51999452330446, 171.51999452330446, 171.51999452330446, 171.51999452330446, 
171.51999452330446, 171.51999452330446, 171.51999452330446, 171.51999452330446],
"eval_len": [121, 121, 121, 121, 121, 121, 121, 121, 121, 121]}

 82%|████████▏ | 819999/1000000 [12:00:50<1:59:13, 25.16it/s]global step 820000, trans_decision ep_re 2005.3139144720576

{"global_step": 820000, "eval_re": [2005.3139144720574, 2005.3139144720574, 
2005.3139144720574, 2005.3139144720574, 2005.3139144720574, 2005.3139144720574, 
2005.3139144720574, 2005.3139144720574, 2005.3139144720574, 2005.3139144720574],
"eval_len": [504, 504, 504, 504, 504, 504, 504, 504, 504, 504]}

 83%|████████▎ | 829997/1000000 [12:09:50<1:51:55, 25.31it/s]global step 830000, trans_decision ep_re 1036.6970536400638

{"global_step": 830000, "eval_re": [1036.6970536400638, 1036.6970536400638, 
1036.6970536400638, 1036.6970536400638, 1036.6970536400638, 1036.6970536400638, 
1036.6970536400638, 1036.6970536400638, 1036.6970536400638, 1036.6970536400638],
"eval_len": [317, 317, 317, 317, 317, 317, 317, 317, 317, 317]}

 84%|████████▍ | 839999/1000000 [12:18:40<1:46:10, 25.12it/s]global step 840000, trans_decision ep_re 412.8067241705982

{"global_step": 840000, "eval_re": [412.8067241705982, 412.8067241705982, 
412.8067241705982, 412.8067241705982, 412.8067241705982, 412.8067241705982, 
412.8067241705982, 412.8067241705982, 412.8067241705982, 412.8067241705982], 
"eval_len": [185, 185, 185, 185, 185, 185, 185, 185, 185, 185]}

 85%|████████▍ | 849999/1000000 [12:27:30<1:39:18, 25.17it/s]global step 850000, trans_decision ep_re 696.0734303722404

{"global_step": 850000, "eval_re": [696.0734303722405, 696.0734303722405, 
696.0734303722405, 696.0734303722405, 696.0734303722405, 696.0734303722405, 
696.0734303722405, 696.0734303722405, 696.0734303722405, 696.0734303722405], 
"eval_len": [331, 331, 331, 331, 331, 331, 331, 331, 331, 331]}

 86%|████████▌ | 859999/1000000 [12:36:30<1:31:36, 25.47it/s]global step 860000, trans_decision ep_re 506.7531833387611

{"global_step": 860000, "eval_re": [506.7531833387611, 506.7531833387611, 
506.7531833387611, 506.7531833387611, 506.7531833387611, 506.7531833387611, 
506.7531833387611, 506.7531833387611, 506.7531833387611, 506.7531833387611], 
"eval_len": [190, 190, 190, 190, 190, 190, 190, 190, 190, 190]}

 87%|████████▋ | 869999/1000000 [12:45:20<1:26:12, 25.13it/s]global step 870000, trans_decision ep_re 532.49362300913

{"global_step": 870000, "eval_re": [532.49362300913, 532.49362300913, 
532.49362300913, 532.49362300913, 532.49362300913, 532.49362300913, 
532.49362300913, 532.49362300913, 532.49362300913, 532.49362300913], "eval_len":
[201, 201, 201, 201, 201, 201, 201, 201, 201, 201]}

 88%|████████▊ | 879999/1000000 [12:53:51<1:19:06, 25.28it/s]global step 880000, trans_decision ep_re 114.89801068440936

{"global_step": 880000, "eval_re": [114.89801068440936, 114.89801068440936, 
114.89801068440936, 114.89801068440936, 114.89801068440936, 114.89801068440936, 
114.89801068440936, 114.89801068440936, 114.89801068440936, 114.89801068440936],
"eval_len": [116, 116, 116, 116, 116, 116, 116, 116, 116, 116]}

 89%|████████▉ | 889999/1000000 [13:02:50<1:13:09, 25.06it/s]global step 890000, trans_decision ep_re 652.3406802962687

{"global_step": 890000, "eval_re": [652.3406802962686, 652.3406802962686, 
652.3406802962686, 652.3406802962686, 652.3406802962686, 652.3406802962686, 
652.3406802962686, 652.3406802962686, 652.3406802962686, 652.3406802962686], 
"eval_len": [229, 229, 229, 229, 229, 229, 229, 229, 229, 229]}

 90%|████████▉ | 899999/1000000 [13:11:40<1:06:05, 25.22it/s]global step 900000, trans_decision ep_re 764.0353780149543

{"global_step": 900000, "eval_re": [764.0353780149543, 764.0353780149543, 
764.0353780149543, 764.0353780149543, 764.0353780149543, 764.0353780149543, 
764.0353780149543, 764.0353780149543, 764.0353780149543, 764.0353780149543], 
"eval_len": [238, 238, 238, 238, 238, 238, 238, 238, 238, 238]}

 91%|█████████ | 909999/1000000 [13:20:30<59:21, 25.27it/s]global step 910000, trans_decision ep_re 655.6094003876277

{"global_step": 910000, "eval_re": [655.6094003876277, 655.6094003876277, 
655.6094003876277, 655.6094003876277, 655.6094003876277, 655.6094003876277, 
655.6094003876277, 655.6094003876277, 655.6094003876277, 655.6094003876277], 
"eval_len": [223, 223, 223, 223, 223, 223, 223, 223, 223, 223]}

 92%|█████████▏| 919999/1000000 [13:29:30<52:35, 25.35it/s]global step 920000, trans_decision ep_re 1044.309234723211

{"global_step": 920000, "eval_re": [1044.309234723211, 1044.309234723211, 
1044.309234723211, 1044.309234723211, 1044.309234723211, 1044.309234723211, 
1044.309234723211, 1044.309234723211, 1044.309234723211, 1044.309234723211], 
"eval_len": [306, 306, 306, 306, 306, 306, 306, 306, 306, 306]}

 93%|█████████▎| 929998/1000000 [13:38:20<46:08, 25.28it/s]global step 930000, trans_decision ep_re 560.1656329786936

{"global_step": 930000, "eval_re": [560.1656329786935, 560.1656329786935, 
560.1656329786935, 560.1656329786935, 560.1656329786935, 560.1656329786935, 
560.1656329786935, 560.1656329786935, 560.1656329786935, 560.1656329786935], 
"eval_len": [204, 204, 204, 204, 204, 204, 204, 204, 204, 204]}

 94%|█████████▍| 939997/1000000 [13:47:10<39:55, 25.05it/s]global step 940000, trans_decision ep_re 647.2878064774367

{"global_step": 940000, "eval_re": [647.2878064774367, 647.2878064774367, 
647.2878064774367, 647.2878064774367, 647.2878064774367, 647.2878064774367, 
647.2878064774367, 647.2878064774367, 647.2878064774367, 647.2878064774367], 
"eval_len": [223, 223, 223, 223, 223, 223, 223, 223, 223, 223]}

 95%|█████████▍| 949997/1000000 [13:56:00<32:48, 25.40it/s]global step 950000, trans_decision ep_re 557.6158462000778

{"global_step": 950000, "eval_re": [557.6158462000778, 557.6158462000778, 
557.6158462000778, 557.6158462000778, 557.6158462000778, 557.6158462000778, 
557.6158462000778, 557.6158462000778, 557.6158462000778, 557.6158462000778], 
"eval_len": [245, 245, 245, 245, 245, 245, 245, 245, 245, 245]}

 96%|█████████▌| 959997/1000000 [14:04:50<26:20, 25.32it/s]global step 960000, trans_decision ep_re 1191.388370533385

{"global_step": 960000, "eval_re": [1191.388370533385, 1191.388370533385, 
1191.388370533385, 1191.388370533385, 1191.388370533385, 1191.388370533385, 
1191.388370533385, 1191.388370533385, 1191.388370533385, 1191.388370533385], 
"eval_len": [325, 325, 325, 325, 325, 325, 325, 325, 325, 325]}

 97%|█████████▋| 969999/1000000 [14:13:50<20:01, 24.97it/s]global step 970000, trans_decision ep_re 703.2385894376096

{"global_step": 970000, "eval_re": [703.2385894376096, 703.2385894376096, 
703.2385894376096, 703.2385894376096, 703.2385894376096, 703.2385894376096, 
703.2385894376096, 703.2385894376096, 703.2385894376096, 703.2385894376096], 
"eval_len": [240, 240, 240, 240, 240, 240, 240, 240, 240, 240]}

 98%|█████████▊| 979998/1000000 [14:22:40<13:01, 25.60it/s]global step 980000, trans_decision ep_re 1899.246349436678

{"global_step": 980000, "eval_re": [1899.246349436678, 1899.246349436678, 
1899.246349436678, 1899.246349436678, 1899.246349436678, 1899.246349436678, 
1899.246349436678, 1899.246349436678, 1899.246349436678, 1899.246349436678], 
"eval_len": [469, 469, 469, 469, 469, 469, 469, 469, 469, 469]}

 99%|█████████▉| 989998/1000000 [14:31:40<06:33, 25.44it/s]global step 990000, trans_decision ep_re 1316.5618490500656

{"global_step": 990000, "eval_re": [1316.5618490500656, 1316.5618490500656, 
1316.5618490500656, 1316.5618490500656, 1316.5618490500656, 1316.5618490500656, 
1316.5618490500656, 1316.5618490500656, 1316.5618490500656, 1316.5618490500656],
"eval_len": [395, 395, 395, 395, 395, 395, 395, 395, 395, 395]}

100%|█████████▉| 999999/1000000 [14:40:30<00:00, 25.42it/s]global step 1000000, trans_decision ep_re 583.7205891175948

{"global_step": 1000000, "eval_re": [583.720589117595, 583.720589117595, 
583.720589117595, 583.720589117595, 583.720589117595, 583.720589117595, 
583.720589117595, 583.720589117595, 583.720589117595, 583.720589117595], 
"eval_len": [216, 216, 216, 216, 216, 216, 216, 216, 216, 216]}

100%|██████████| 1000000/1000000 [14:40:40<00:00, 18.92it/s]
