
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.0
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:19<8:21:09, 32.92it/s]global step 10000, trans_decision ep_re 49.24973536817514

{"global_step": 10000, "eval_re": [49.24973536817513, 49.24973536817513, 
49.24973536817513, 49.24973536817513, 49.24973536817513, 49.24973536817513, 
49.24973536817513, 49.24973536817513, 49.24973536817513, 49.24973536817513], 
"eval_len": [38, 38, 38, 38, 38, 38, 38, 38, 38, 38]}

  2%|▏         | 19999/1000000 [09:50<8:16:44, 32.88it/s]global step 20000, trans_decision ep_re 218.32695808847308

{"global_step": 20000, "eval_re": [218.32695808847305, 218.32695808847305, 
218.32695808847305, 218.32695808847305, 218.32695808847305, 218.32695808847305, 
218.32695808847305, 218.32695808847305, 218.32695808847305, 218.32695808847305],
"eval_len": [117, 117, 117, 117, 117, 117, 117, 117, 117, 117]}

  3%|▎         | 29998/1000000 [16:40<8:12:08, 32.85it/s]global step 30000, trans_decision ep_re 856.8868503545798

{"global_step": 30000, "eval_re": [856.8868503545799, 856.8868503545799, 
856.8868503545799, 856.8868503545799, 856.8868503545799, 856.8868503545799, 
856.8868503545799, 856.8868503545799, 856.8868503545799, 856.8868503545799], 
"eval_len": [388, 388, 388, 388, 388, 388, 388, 388, 388, 388]}

  4%|▍         | 39997/1000000 [23:20<8:15:16, 32.31it/s]global step 40000, trans_decision ep_re 221.0437017398603

{"global_step": 40000, "eval_re": [221.0437017398603, 221.0437017398603, 
221.0437017398603, 221.0437017398603, 221.0437017398603, 221.0437017398603, 
221.0437017398603, 221.0437017398603, 221.0437017398603, 221.0437017398603], 
"eval_len": [138, 138, 138, 138, 138, 138, 138, 138, 138, 138]}

  5%|▍         | 49997/1000000 [30:00<8:01:54, 32.86it/s]global step 50000, trans_decision ep_re 1949.7714318044807

{"global_step": 50000, "eval_re": [1949.7714318044805, 1949.7714318044805, 
1949.7714318044805, 1949.7714318044805, 1949.7714318044805, 1949.7714318044805, 
1949.7714318044805, 1949.7714318044805, 1949.7714318044805, 1949.7714318044805],
"eval_len": [809, 809, 809, 809, 809, 809, 809, 809, 809, 809]}

  6%|▌         | 59997/1000000 [36:41<7:57:20, 32.82it/s]global step 60000, trans_decision ep_re 323.1281632700353

{"global_step": 60000, "eval_re": [323.1281632700353, 323.1281632700353, 
323.1281632700353, 323.1281632700353, 323.1281632700353, 323.1281632700353, 
323.1281632700353, 323.1281632700353, 323.1281632700353, 323.1281632700353], 
"eval_len": [156, 156, 156, 156, 156, 156, 156, 156, 156, 156]}

  7%|▋         | 69999/1000000 [43:30<7:51:37, 32.87it/s]global step 70000, trans_decision ep_re 1431.6759025844215

{"global_step": 70000, "eval_re": [1431.6759025844212, 1431.6759025844212, 
1431.6759025844212, 1431.6759025844212, 1431.6759025844212, 1431.6759025844212, 
1431.6759025844212, 1431.6759025844212, 1431.6759025844212, 1431.6759025844212],
"eval_len": [457, 457, 457, 457, 457, 457, 457, 457, 457, 457]}

  8%|▊         | 79997/1000000 [50:03<7:49:51, 32.63it/s]global step 80000, trans_decision ep_re 420.03302923601984

{"global_step": 80000, "eval_re": [420.0330292360199, 420.0330292360199, 
420.0330292360199, 420.0330292360199, 420.0330292360199, 420.0330292360199, 
420.0330292360199, 420.0330292360199, 420.0330292360199, 420.0330292360199], 
"eval_len": [185, 185, 185, 185, 185, 185, 185, 185, 185, 185]}

  9%|▉         | 89999/1000000 [57:00<7:40:54, 32.91it/s]global step 90000, trans_decision ep_re 3140.1240132671

{"global_step": 90000, "eval_re": [3140.1240132670996, 3140.1240132670996, 
3140.1240132670996, 3140.1240132670996, 3140.1240132670996, 3140.1240132670996, 
3140.1240132670996, 3140.1240132670996, 3140.1240132670996, 3140.1240132670996],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [1:04:00<7:38:14, 32.73it/s]global step 100000, trans_decision ep_re 1306.8735713869426

{"global_step": 100000, "eval_re": [1401.9581748125004, 1401.9581748125004, 
1401.9581748125004, 1401.9581748125004, 1401.9581748125004, 451.11214055692307, 
1401.9581748125004, 1401.9581748125004, 1401.9581748125004, 1401.9581748125004],
"eval_len": [429, 429, 429, 429, 429, 187, 429, 429, 429, 429]}

 11%|█         | 109997/1000000 [1:10:40<7:28:24, 33.08it/s]global step 110000, trans_decision ep_re 704.0446329485378

{"global_step": 110000, "eval_re": [704.0446329485378, 704.0446329485378, 
704.0446329485378, 704.0446329485378, 704.0446329485378, 704.0446329485378, 
704.0446329485378, 704.0446329485378, 704.0446329485378, 704.0446329485378], 
"eval_len": [273, 273, 273, 273, 273, 273, 273, 273, 273, 273]}

 12%|█▏        | 119997/1000000 [1:17:20<7:29:23, 32.64it/s]global step 120000, trans_decision ep_re 3435.2487572062337

{"global_step": 120000, "eval_re": [3435.248757206234, 3435.248757206234, 
3435.248757206234, 3435.248757206234, 3435.248757206234, 3435.248757206234, 
3435.248757206234, 3435.248757206234, 3435.248757206234, 3435.248757206234], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:24:30<7:22:07, 32.80it/s]global step 130000, trans_decision ep_re 3367.6029667270095

{"global_step": 130000, "eval_re": [3529.5722209561027, 3526.137750846233, 
3529.5722209561027, 1913.3141487750427, 3529.5722209561027, 3529.5722209561027, 
3529.5722209561027, 3529.5722209561027, 3529.5722209561027, 3529.5722209561027],
"eval_len": [1000, 1000, 1000, 578, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:31:30<7:15:58, 32.88it/s]global step 140000, trans_decision ep_re 1276.5885634467124

{"global_step": 140000, "eval_re": [1331.9556337391118, 1331.9556337391118, 
1331.9556337391118, 1331.9556337391118, 1331.9556337391118, 1331.9556337391118, 
1331.9556337391118, 1331.9556337391118, 778.2849308151181, 1331.9556337391118], 
"eval_len": [402, 402, 402, 402, 402, 402, 402, 402, 273, 402]}

 15%|█▍        | 149997/1000000 [1:38:10<7:13:41, 32.67it/s]global step 150000, trans_decision ep_re 3500.1020927907275

{"global_step": 150000, "eval_re": [3500.102092790727, 3500.102092790727, 
3500.102092790727, 3500.102092790727, 3500.102092790727, 3500.102092790727, 
3500.102092790727, 3500.102092790727, 3500.102092790727, 3500.102092790727], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:45:10<7:05:32, 32.90it/s]global step 160000, trans_decision ep_re 3465.7100558311695

{"global_step": 160000, "eval_re": [3465.7100558311695, 3465.7100558311695, 
3465.7100558311695, 3465.7100558311695, 3465.7100558311695, 3465.7100558311695, 
3465.7100558311695, 3465.7100558311695, 3465.7100558311695, 3465.7100558311695],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [1:52:20<6:59:40, 32.96it/s]global step 170000, trans_decision ep_re 3317.0476284607876

{"global_step": 170000, "eval_re": [1504.6824950417385, 3518.4215321740153, 
3518.4215321740153, 3518.4215321740153, 3518.4215321740153, 3518.4215321740153, 
3518.4215321740153, 3518.4215321740153, 3518.4215321740153, 3518.4215321740153],
"eval_len": [425, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [1:59:20<6:55:02, 32.93it/s]global step 180000, trans_decision ep_re 3557.356287730562

{"global_step": 180000, "eval_re": [3557.3562877305626, 3557.3562877305626, 
3557.3562877305626, 3557.3562877305626, 3557.3562877305626, 3557.3562877305626, 
3557.3562877305626, 3557.3562877305626, 3557.3562877305626, 3557.3562877305626],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:06:20<6:56:36, 32.40it/s]global step 190000, trans_decision ep_re 3544.106337920296

{"global_step": 190000, "eval_re": [3544.106337920296, 3544.106337920296, 
3544.106337920296, 3544.106337920296, 3544.106337920296, 3544.106337920296, 
3544.106337920296, 3544.106337920296, 3544.106337920296, 3544.106337920296], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199997/1000000 [2:13:30<6:46:45, 32.78it/s]global step 200000, trans_decision ep_re 3141.054380596175

{"global_step": 200000, "eval_re": [3141.054380596175, 3141.054380596175, 
3141.054380596175, 3141.054380596175, 3141.054380596175, 3141.054380596175, 
3141.054380596175, 3141.054380596175, 3141.054380596175, 3141.054380596175], 
"eval_len": [894, 894, 894, 894, 894, 894, 894, 894, 894, 894]}

 21%|██        | 209997/1000000 [2:20:13<6:41:19, 32.81it/s]global step 210000, trans_decision ep_re 126.82268641548652

{"global_step": 210000, "eval_re": [126.82268641548652, 126.82268641548652, 
126.82268641548652, 126.82268641548652, 126.82268641548652, 126.82268641548652, 
126.82268641548652, 126.82268641548652, 126.82268641548652, 126.82268641548652],
"eval_len": [79, 79, 79, 79, 79, 79, 79, 79, 79, 79]}

 22%|██▏       | 219996/1000000 [2:27:00<6:44:56, 32.10it/s]global step 220000, trans_decision ep_re 3577.501618889365

{"global_step": 220000, "eval_re": [3577.5016188893646, 3577.5016188893646, 
3577.5016188893646, 3577.5016188893646, 3577.5016188893646, 3577.5016188893646, 
3577.5016188893646, 3577.5016188893646, 3577.5016188893646, 3577.5016188893646],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229997/1000000 [2:34:10<6:30:36, 32.85it/s]global step 230000, trans_decision ep_re 3516.4957456000434

{"global_step": 230000, "eval_re": [3516.4957456000434, 3516.4957456000434, 
3516.4957456000434, 3516.4957456000434, 3516.4957456000434, 3516.4957456000434, 
3516.4957456000434, 3516.4957456000434, 3516.4957456000434, 3516.4957456000434],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239997/1000000 [2:41:10<6:25:17, 32.88it/s]global step 240000, trans_decision ep_re 2652.2155492761985

{"global_step": 240000, "eval_re": [2652.2155492761985, 2652.2155492761985, 
2652.2155492761985, 2652.2155492761985, 2652.2155492761985, 2652.2155492761985, 
2652.2155492761985, 2652.2155492761985, 2652.2155492761985, 2652.2155492761985],
"eval_len": [727, 727, 727, 727, 727, 727, 727, 727, 727, 727]}

 25%|██▍       | 249997/1000000 [2:48:10<6:26:17, 32.36it/s]global step 250000, trans_decision ep_re 1568.5625967405854

{"global_step": 250000, "eval_re": [1568.5625967405854, 1568.5625967405854, 
1568.5625967405854, 1568.5625967405854, 1568.5625967405854, 1568.5625967405854, 
1568.5625967405854, 1568.5625967405854, 1568.5625967405854, 1568.5625967405854],
"eval_len": [466, 466, 466, 466, 466, 466, 466, 466, 466, 466]}

 26%|██▌       | 259997/1000000 [2:54:50<6:14:40, 32.92it/s]global step 260000, trans_decision ep_re 1083.3050641804027

{"global_step": 260000, "eval_re": [1083.333077336427, 1083.333077336427, 
1083.333077336427, 1083.333077336427, 1083.333077336427, 1083.0529457761834, 
1083.333077336427, 1083.333077336427, 1083.333077336427, 1083.333077336427], 
"eval_len": [338, 338, 338, 338, 338, 338, 338, 338, 338, 338]}

 27%|██▋       | 269997/1000000 [3:01:40<6:15:10, 32.43it/s]global step 270000, trans_decision ep_re 3330.318809988284

{"global_step": 270000, "eval_re": [3502.3197683015032, 1782.3101851693166, 
3502.3197683015032, 3502.3197683015032, 3502.3197683015032, 3502.3197683015032, 
3502.3197683015032, 3502.3197683015032, 3502.3197683015032, 3502.3197683015032],
"eval_len": [1000, 531, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279997/1000000 [3:08:40<6:04:49, 32.89it/s]global step 280000, trans_decision ep_re 3492.298769309077

{"global_step": 280000, "eval_re": [3492.2987693090768, 3492.2987693090768, 
3492.2987693090768, 3492.2987693090768, 3492.2987693090768, 3492.2987693090768, 
3492.2987693090768, 3492.2987693090768, 3492.2987693090768, 3492.2987693090768],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289997/1000000 [3:15:40<6:00:21, 32.84it/s]global step 290000, trans_decision ep_re 110.02647301018683

{"global_step": 290000, "eval_re": [110.02647301018682, 110.02647301018682, 
110.02647301018682, 110.02647301018682, 110.02647301018682, 110.02647301018682, 
110.02647301018682, 110.02647301018682, 110.02647301018682, 110.02647301018682],
"eval_len": [72, 72, 72, 72, 72, 72, 72, 72, 72, 72]}

 30%|██▉       | 299999/1000000 [3:22:20<5:55:52, 32.78it/s]global step 300000, trans_decision ep_re 3308.868135369935

{"global_step": 300000, "eval_re": [3493.965347737774, 3502.4536457713466, 
3493.965347737774, 3493.965347737774, 3493.965347737774, 3493.965347737774, 
3493.965347737774, 1634.5049260258152, 3493.965347737774, 3493.965347737774], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 500, 1000, 1000]}

 31%|███       | 309997/1000000 [3:29:06<5:49:01, 32.95it/s]global step 310000, trans_decision ep_re 180.41133555010288

{"global_step": 310000, "eval_re": [180.41133555010288, 180.41133555010288, 
180.41133555010288, 180.41133555010288, 180.41133555010288, 180.41133555010288, 
180.41133555010288, 180.41133555010288, 180.41133555010288, 180.41133555010288],
"eval_len": [98, 98, 98, 98, 98, 98, 98, 98, 98, 98]}

 32%|███▏      | 319996/1000000 [3:35:42<5:50:28, 32.34it/s]global step 320000, trans_decision ep_re 130.9571723828965

{"global_step": 320000, "eval_re": [130.9571723828965, 130.9571723828965, 
130.9571723828965, 130.9571723828965, 130.9571723828965, 130.9571723828965, 
130.9571723828965, 130.9571723828965, 130.9571723828965, 130.9571723828965], 
"eval_len": [80, 80, 80, 80, 80, 80, 80, 80, 80, 80]}

 33%|███▎      | 329997/1000000 [3:42:30<5:40:04, 32.84it/s]global step 330000, trans_decision ep_re 3512.8044434981784

{"global_step": 330000, "eval_re": [3512.8044434981784, 3512.8044434981784, 
3512.8044434981784, 3512.8044434981784, 3512.8044434981784, 3512.8044434981784, 
3512.8044434981784, 3512.8044434981784, 3512.8044434981784, 3512.8044434981784],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339997/1000000 [3:49:23<5:41:23, 32.22it/s]global step 340000, trans_decision ep_re 200.01411193752458

{"global_step": 340000, "eval_re": [200.01411193752458, 200.01411193752458, 
200.01411193752458, 200.01411193752458, 200.01411193752458, 200.01411193752458, 
200.01411193752458, 200.01411193752458, 200.01411193752458, 200.01411193752458],
"eval_len": [102, 102, 102, 102, 102, 102, 102, 102, 102, 102]}

 35%|███▍      | 349999/1000000 [3:56:10<5:30:35, 32.77it/s]global step 350000, trans_decision ep_re 3530.2976874663036

{"global_step": 350000, "eval_re": [3530.2976874663036, 3530.2976874663036, 
3530.2976874663036, 3530.2976874663036, 3530.2976874663036, 3530.2976874663036, 
3530.2976874663036, 3530.2976874663036, 3530.2976874663036, 3530.2976874663036],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359997/1000000 [4:03:20<5:24:24, 32.88it/s]global step 360000, trans_decision ep_re 1884.2973438129598

{"global_step": 360000, "eval_re": [1884.2973438129595, 1884.2973438129595, 
1884.2973438129595, 1884.2973438129595, 1884.2973438129595, 1884.2973438129595, 
1884.2973438129595, 1884.2973438129595, 1884.2973438129595, 1884.2973438129595],
"eval_len": [549, 549, 549, 549, 549, 549, 549, 549, 549, 549]}

 37%|███▋      | 369997/1000000 [4:10:10<5:19:39, 32.85it/s]global step 370000, trans_decision ep_re 3464.1625983717736

{"global_step": 370000, "eval_re": [3464.162598371773, 3464.162598371773, 
3464.162598371773, 3464.162598371773, 3464.162598371773, 3464.162598371773, 
3464.162598371773, 3464.162598371773, 3464.162598371773, 3464.162598371773], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379997/1000000 [4:17:10<5:14:51, 32.82it/s]global step 380000, trans_decision ep_re 184.87145019218866

{"global_step": 380000, "eval_re": [184.8714501921887, 184.8714501921887, 
184.8714501921887, 184.8714501921887, 184.8714501921887, 184.8714501921887, 
184.8714501921887, 184.8714501921887, 184.8714501921887, 184.8714501921887], 
"eval_len": [97, 97, 97, 97, 97, 97, 97, 97, 97, 97]}

 39%|███▉      | 389997/1000000 [4:23:50<5:15:38, 32.21it/s]global step 390000, trans_decision ep_re 1636.420937013599

{"global_step": 390000, "eval_re": [1636.4209370135989, 1636.4209370135989, 
1636.4209370135989, 1636.4209370135989, 1636.4209370135989, 1636.4209370135989, 
1636.4209370135989, 1636.4209370135989, 1636.4209370135989, 1636.4209370135989],
"eval_len": [444, 444, 444, 444, 444, 444, 444, 444, 444, 444]}

 40%|███▉      | 399997/1000000 [4:30:40<5:04:14, 32.87it/s]global step 400000, trans_decision ep_re 3529.693843685942

{"global_step": 400000, "eval_re": [3529.693843685942, 3529.693843685942, 
3529.693843685942, 3529.693843685942, 3529.693843685942, 3529.693843685942, 
3529.693843685942, 3529.693843685942, 3529.693843685942, 3529.693843685942], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409997/1000000 [4:37:40<4:59:26, 32.84it/s]global step 410000, trans_decision ep_re 3224.107079297214

{"global_step": 410000, "eval_re": [3138.588503591196, 3138.588503591196, 
3138.588503591196, 3592.081988512313, 3610.4495531305342, 3068.419726190924, 
3138.588503591196, 3138.588503591196, 3138.588503591196, 3138.588503591196], 
"eval_len": [850, 850, 850, 1000, 1000, 832, 850, 850, 850, 850]}

 42%|████▏     | 419997/1000000 [4:44:40<4:54:04, 32.87it/s]global step 420000, trans_decision ep_re 1851.0712681802554

{"global_step": 420000, "eval_re": [1851.0712681802556, 1851.0712681802556, 
1851.0712681802556, 1851.0712681802556, 1851.0712681802556, 1851.0712681802556, 
1851.0712681802556, 1851.0712681802556, 1851.0712681802556, 1851.0712681802556],
"eval_len": [520, 520, 520, 520, 520, 520, 520, 520, 520, 520]}

 43%|████▎     | 429997/1000000 [4:51:30<4:50:09, 32.74it/s]global step 430000, trans_decision ep_re 1092.484938870773

{"global_step": 430000, "eval_re": [1092.4849388707728, 1092.4849388707728, 
1092.4849388707728, 1092.4849388707728, 1092.4849388707728, 1092.4849388707728, 
1092.4849388707728, 1092.4849388707728, 1092.4849388707728, 1092.4849388707728],
"eval_len": [322, 322, 322, 322, 322, 322, 322, 322, 322, 322]}

 44%|████▍     | 439997/1000000 [4:58:04<4:50:54, 32.08it/s]global step 440000, trans_decision ep_re 460.92049594920445

{"global_step": 440000, "eval_re": [460.92049594920445, 460.92049594920445, 
460.92049594920445, 460.92049594920445, 460.92049594920445, 460.92049594920445, 
460.92049594920445, 460.92049594920445, 460.92049594920445, 460.92049594920445],
"eval_len": [180, 180, 180, 180, 180, 180, 180, 180, 180, 180]}

 45%|████▍     | 449999/1000000 [5:05:00<4:37:49, 33.00it/s]global step 450000, trans_decision ep_re 1713.2965652430364

{"global_step": 450000, "eval_re": [1712.5328195451386, 1712.5328195451386, 
1712.5328195451386, 1712.5328195451386, 1712.5328195451386, 1712.552825121382, 
1712.5328195451386, 1712.5328195451386, 1712.5328195451386, 1720.1502709478732],
"eval_len": [506, 506, 506, 506, 506, 506, 506, 506, 506, 506]}

 46%|████▌     | 459997/1000000 [5:11:50<4:36:55, 32.50it/s]global step 460000, trans_decision ep_re 1802.4462424415742

{"global_step": 460000, "eval_re": [1802.4462424415742, 1802.4462424415742, 
1802.4462424415742, 1802.4462424415742, 1802.4462424415742, 1802.4462424415742, 
1802.4462424415742, 1802.4462424415742, 1802.4462424415742, 1802.4462424415742],
"eval_len": [507, 507, 507, 507, 507, 507, 507, 507, 507, 507]}

 47%|████▋     | 469997/1000000 [5:18:40<4:28:51, 32.85it/s]global step 470000, trans_decision ep_re 2458.7172399643714

{"global_step": 470000, "eval_re": [2458.7172399643714, 2458.7172399643714, 
2458.7172399643714, 2458.7172399643714, 2458.7172399643714, 2458.7172399643714, 
2458.7172399643714, 2458.7172399643714, 2458.7172399643714, 2458.7172399643714],
"eval_len": [678, 678, 678, 678, 678, 678, 678, 678, 678, 678]}

 48%|████▊     | 479997/1000000 [5:25:30<4:26:28, 32.52it/s]global step 480000, trans_decision ep_re 1802.6421542805788

{"global_step": 480000, "eval_re": [1802.6421542805788, 1802.6421542805788, 
1802.6421542805788, 1802.6421542805788, 1802.6421542805788, 1802.6421542805788, 
1802.6421542805788, 1802.6421542805788, 1802.6421542805788, 1802.6421542805788],
"eval_len": [512, 512, 512, 512, 512, 512, 512, 512, 512, 512]}

 49%|████▉     | 489997/1000000 [5:32:20<4:18:26, 32.89it/s]global step 490000, trans_decision ep_re 1498.953657190687

{"global_step": 490000, "eval_re": [1498.9536571906872, 1498.9536571906872, 
1498.9536571906872, 1498.9536571906872, 1498.9536571906872, 1498.9536571906872, 
1498.9536571906872, 1498.9536571906872, 1498.9536571906872, 1498.9536571906872],
"eval_len": [426, 426, 426, 426, 426, 426, 426, 426, 426, 426]}

 50%|████▉     | 499997/1000000 [5:38:53<4:18:20, 32.26it/s]global step 500000, trans_decision ep_re 121.92067327554128

{"global_step": 500000, "eval_re": [121.92067327554128, 121.92067327554128, 
121.92067327554128, 121.92067327554128, 121.92067327554128, 121.92067327554128, 
121.92067327554128, 121.92067327554128, 121.92067327554128, 121.92067327554128],
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 51%|█████     | 509996/1000000 [5:45:40<4:08:20, 32.88it/s]global step 510000, trans_decision ep_re 2108.557926078316

{"global_step": 510000, "eval_re": [2025.3484198902256, 2025.3484198902256, 
2025.3484198902256, 2857.443481771128, 2025.3484198902256, 2025.3484198902256, 
2025.3484198902256, 2025.3484198902256, 2025.3484198902256, 2025.3484198902256],
"eval_len": [599, 599, 599, 791, 599, 599, 599, 599, 599, 599]}

 52%|█████▏    | 519997/1000000 [5:52:40<4:04:46, 32.68it/s]global step 520000, trans_decision ep_re 3621.7236633894054

{"global_step": 520000, "eval_re": [3621.723663389406, 3621.723663389406, 
3621.723663389406, 3621.723663389406, 3621.723663389406, 3621.723663389406, 
3621.723663389406, 3621.723663389406, 3621.723663389406, 3621.723663389406], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529997/1000000 [5:59:40<3:58:37, 32.83it/s]global step 530000, trans_decision ep_re 2024.0598170843998

{"global_step": 530000, "eval_re": [2024.0598170843998, 2024.0598170843998, 
2024.0598170843998, 2024.0598170843998, 2024.0598170843998, 2024.0598170843998, 
2024.0598170843998, 2024.0598170843998, 2024.0598170843998, 2024.0598170843998],
"eval_len": [561, 561, 561, 561, 561, 561, 561, 561, 561, 561]}

 54%|█████▍    | 539997/1000000 [6:06:30<3:52:38, 32.95it/s]global step 540000, trans_decision ep_re 1376.7530093296732

{"global_step": 540000, "eval_re": [1376.753009329673, 1376.753009329673, 
1376.753009329673, 1376.753009329673, 1376.753009329673, 1376.753009329673, 
1376.753009329673, 1376.753009329673, 1376.753009329673, 1376.753009329673], 
"eval_len": [381, 381, 381, 381, 381, 381, 381, 381, 381, 381]}

 55%|█████▍    | 549997/1000000 [6:13:10<3:48:27, 32.83it/s]global step 550000, trans_decision ep_re 1075.0302235864056

{"global_step": 550000, "eval_re": [1075.0302235864056, 1075.0302235864056, 
1075.0302235864056, 1075.0302235864056, 1075.0302235864056, 1075.0302235864056, 
1075.0302235864056, 1075.0302235864056, 1075.0302235864056, 1075.0302235864056],
"eval_len": [307, 307, 307, 307, 307, 307, 307, 307, 307, 307]}

 56%|█████▌    | 559997/1000000 [6:19:50<3:43:04, 32.87it/s]global step 560000, trans_decision ep_re 2254.8605686505534

{"global_step": 560000, "eval_re": [2254.8605686505534, 2254.8605686505534, 
2254.8605686505534, 2254.8605686505534, 2254.8605686505534, 2254.8605686505534, 
2254.8605686505534, 2254.8605686505534, 2254.8605686505534, 2254.8605686505534],
"eval_len": [604, 604, 604, 604, 604, 604, 604, 604, 604, 604]}

 57%|█████▋    | 569997/1000000 [6:26:40<3:36:42, 33.07it/s]global step 570000, trans_decision ep_re 2097.885126604525

{"global_step": 570000, "eval_re": [2103.909139864291, 2103.909139864291, 
2103.909139864291, 2103.909139864291, 2103.909139864291, 2043.6690072666293, 
2103.909139864291, 2103.909139864291, 2103.909139864291, 2103.909139864291], 
"eval_len": [556, 556, 556, 556, 556, 547, 556, 556, 556, 556]}

 58%|█████▊    | 579997/1000000 [6:33:30<3:31:52, 33.04it/s]global step 580000, trans_decision ep_re 1783.5822990282709

{"global_step": 580000, "eval_re": [1576.7856427679123, 1576.7856427679123, 
1576.7856427679123, 1576.7856427679123, 3644.7522053714956, 1576.7856427679123, 
1576.7856427679123, 1576.7856427679123, 1576.7856427679123, 1576.7856427679123],
"eval_len": [454, 454, 454, 454, 1000, 454, 454, 454, 454, 454]}

 59%|█████▉    | 589999/1000000 [6:40:10<3:25:57, 33.18it/s]global step 590000, trans_decision ep_re 1212.5867271334469

{"global_step": 590000, "eval_re": [1212.5867271334469, 1212.5867271334469, 
1212.5867271334469, 1212.5867271334469, 1212.5867271334469, 1212.5867271334469, 
1212.5867271334469, 1212.5867271334469, 1212.5867271334469, 1212.5867271334469],
"eval_len": [349, 349, 349, 349, 349, 349, 349, 349, 349, 349]}

 60%|█████▉    | 599997/1000000 [6:46:50<3:21:54, 33.02it/s]global step 600000, trans_decision ep_re 1639.8532141288895

{"global_step": 600000, "eval_re": [1734.8845816789203, 1734.8845816789203, 
784.5709061786139, 1734.8845816789203, 1734.8845816789203, 1734.8845816789203, 
1734.8845816789203, 1734.8845816789203, 1734.8845816789203, 1734.8845816789203],
"eval_len": [466, 466, 262, 466, 466, 466, 466, 466, 466, 466]}

 61%|██████    | 609997/1000000 [6:53:40<3:16:45, 33.04it/s]global step 610000, trans_decision ep_re 1267.7465334056667

{"global_step": 610000, "eval_re": [1267.7465334056667, 1267.7465334056667, 
1267.7465334056667, 1267.7465334056667, 1267.7465334056667, 1267.7465334056667, 
1267.7465334056667, 1267.7465334056667, 1267.7465334056667, 1267.7465334056667],
"eval_len": [347, 347, 347, 347, 347, 347, 347, 347, 347, 347]}

 62%|██████▏   | 619999/1000000 [7:00:20<3:12:23, 32.92it/s]global step 620000, trans_decision ep_re 2382.9720063998693

{"global_step": 620000, "eval_re": [2382.9720063998693, 2382.9720063998693, 
2382.9720063998693, 2382.9720063998693, 2382.9720063998693, 2382.9720063998693, 
2382.9720063998693, 2382.9720063998693, 2382.9720063998693, 2382.9720063998693],
"eval_len": [635, 635, 635, 635, 635, 635, 635, 635, 635, 635]}

 63%|██████▎   | 629997/1000000 [7:07:10<3:05:07, 33.31it/s]global step 630000, trans_decision ep_re 1646.4767543033417

{"global_step": 630000, "eval_re": [1646.4767543033415, 1646.4767543033415, 
1646.4767543033415, 1646.4767543033415, 1646.4767543033415, 1646.4767543033415, 
1646.4767543033415, 1646.4767543033415, 1646.4767543033415, 1646.4767543033415],
"eval_len": [441, 441, 441, 441, 441, 441, 441, 441, 441, 441]}

 64%|██████▍   | 639997/1000000 [7:13:34<3:02:00, 32.97it/s]global step 640000, trans_decision ep_re 590.3202042367112

{"global_step": 640000, "eval_re": [590.3202042367112, 590.3202042367112, 
590.3202042367112, 590.3202042367112, 590.3202042367112, 590.3202042367112, 
590.3202042367112, 590.3202042367112, 590.3202042367112, 590.3202042367112], 
"eval_len": [204, 204, 204, 204, 204, 204, 204, 204, 204, 204]}

 65%|██████▍   | 649996/1000000 [7:20:30<2:57:07, 32.93it/s]global step 650000, trans_decision ep_re 1867.891504591878

{"global_step": 650000, "eval_re": [1867.8915045918784, 1867.8915045918784, 
1867.8915045918784, 1867.8915045918784, 1867.8915045918784, 1867.8915045918784, 
1867.8915045918784, 1867.8915045918784, 1867.8915045918784, 1867.8915045918784],
"eval_len": [507, 507, 507, 507, 507, 507, 507, 507, 507, 507]}

 66%|██████▌   | 659997/1000000 [7:26:55<2:51:37, 33.02it/s]global step 660000, trans_decision ep_re 160.17168974168266

{"global_step": 660000, "eval_re": [160.17168974168263, 160.17168974168263, 
160.17168974168263, 160.17168974168263, 160.17168974168263, 160.17168974168263, 
160.17168974168263, 160.17168974168263, 160.17168974168263, 160.17168974168263],
"eval_len": [87, 87, 87, 87, 87, 87, 87, 87, 87, 87]}

 67%|██████▋   | 669996/1000000 [7:33:40<2:46:07, 33.11it/s]global step 670000, trans_decision ep_re 1185.780458911438

{"global_step": 670000, "eval_re": [1185.780458911438, 1185.780458911438, 
1185.780458911438, 1185.780458911438, 1185.780458911438, 1185.780458911438, 
1185.780458911438, 1185.780458911438, 1185.780458911438, 1185.780458911438], 
"eval_len": [323, 323, 323, 323, 323, 323, 323, 323, 323, 323]}

 68%|██████▊   | 679997/1000000 [7:40:20<2:41:55, 32.94it/s]global step 680000, trans_decision ep_re 743.4414870670041

{"global_step": 680000, "eval_re": [743.441487067004, 743.441487067004, 
743.441487067004, 743.441487067004, 743.441487067004, 743.441487067004, 
743.441487067004, 743.441487067004, 743.441487067004, 743.441487067004], 
"eval_len": [241, 241, 241, 241, 241, 241, 241, 241, 241, 241]}

 69%|██████▉   | 689997/1000000 [7:46:47<2:36:48, 32.95it/s]global step 690000, trans_decision ep_re 198.33477956027426

{"global_step": 690000, "eval_re": [198.33477956027426, 198.33477956027426, 
198.33477956027426, 198.33477956027426, 198.33477956027426, 198.33477956027426, 
198.33477956027426, 198.33477956027426, 198.33477956027426, 198.33477956027426],
"eval_len": [100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}

 70%|██████▉   | 699996/1000000 [7:53:20<2:31:56, 32.91it/s]global step 700000, trans_decision ep_re 33.90842386764621

{"global_step": 700000, "eval_re": [33.90842386764621, 33.90842386764621, 
33.90842386764621, 33.90842386764621, 33.90842386764621, 33.90842386764621, 
33.90842386764621, 33.90842386764621, 33.90842386764621, 33.90842386764621], 
"eval_len": [35, 35, 35, 35, 35, 35, 35, 35, 35, 35]}

 71%|███████   | 709996/1000000 [8:00:10<2:27:06, 32.85it/s]global step 710000, trans_decision ep_re 1862.7088655969299

{"global_step": 710000, "eval_re": [1862.7088655969299, 1862.7088655969299, 
1862.7088655969299, 1862.7088655969299, 1862.7088655969299, 1862.7088655969299, 
1862.7088655969299, 1862.7088655969299, 1862.7088655969299, 1862.7088655969299],
"eval_len": [490, 490, 490, 490, 490, 490, 490, 490, 490, 490]}

 72%|███████▏  | 719998/1000000 [8:06:50<2:21:46, 32.92it/s]global step 720000, trans_decision ep_re 926.5616534180105

{"global_step": 720000, "eval_re": [926.5616534180105, 926.5616534180105, 
926.5616534180105, 926.5616534180105, 926.5616534180105, 926.5616534180105, 
926.5616534180105, 926.5616534180105, 926.5616534180105, 926.5616534180105], 
"eval_len": [262, 262, 262, 262, 262, 262, 262, 262, 262, 262]}

 73%|███████▎  | 729998/1000000 [8:13:30<2:17:05, 32.82it/s]global step 730000, trans_decision ep_re 3797.3275142329994

{"global_step": 730000, "eval_re": [3797.327514232999, 3797.327514232999, 
3797.327514232999, 3797.327514232999, 3797.327514232999, 3797.327514232999, 
3797.327514232999, 3797.327514232999, 3797.327514232999, 3797.327514232999], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739997/1000000 [8:20:30<2:11:56, 32.84it/s]global step 740000, trans_decision ep_re 1218.551806626805

{"global_step": 740000, "eval_re": [1218.551806626805, 1218.551806626805, 
1218.551806626805, 1218.551806626805, 1218.551806626805, 1218.551806626805, 
1218.551806626805, 1218.551806626805, 1218.551806626805, 1218.551806626805], 
"eval_len": [335, 335, 335, 335, 335, 335, 335, 335, 335, 335]}

 75%|███████▍  | 749997/1000000 [8:27:10<2:06:28, 32.95it/s]global step 750000, trans_decision ep_re 886.8840345862342

{"global_step": 750000, "eval_re": [904.0154718887994, 904.0154718887994, 
904.0154718887994, 904.0154718887994, 904.0154718887994, 732.7010988631462, 
904.0154718887994, 904.0154718887994, 904.0154718887994, 904.0154718887994], 
"eval_len": [271, 271, 271, 271, 271, 241, 271, 271, 271, 271]}

 76%|███████▌  | 759997/1000000 [8:33:50<2:00:36, 33.16it/s]global step 760000, trans_decision ep_re 1252.9120464130167

{"global_step": 760000, "eval_re": [1252.9120464130165, 1252.9120464130165, 
1252.9120464130165, 1252.9120464130165, 1252.9120464130165, 1252.9120464130165, 
1252.9120464130165, 1252.9120464130165, 1252.9120464130165, 1252.9120464130165],
"eval_len": [351, 351, 351, 351, 351, 351, 351, 351, 351, 351]}

 77%|███████▋  | 769997/1000000 [8:40:30<1:56:05, 33.02it/s]global step 770000, trans_decision ep_re 1266.9067427361529

{"global_step": 770000, "eval_re": [1266.9067427361529, 1266.9067427361529, 
1266.9067427361529, 1266.9067427361529, 1266.9067427361529, 1266.9067427361529, 
1266.9067427361529, 1266.9067427361529, 1266.9067427361529, 1266.9067427361529],
"eval_len": [374, 374, 374, 374, 374, 374, 374, 374, 374, 374]}

 78%|███████▊  | 779997/1000000 [8:47:10<1:50:59, 33.03it/s]global step 780000, trans_decision ep_re 1591.1794205936653

{"global_step": 780000, "eval_re": [1591.179420593665, 1591.179420593665, 
1591.179420593665, 1591.179420593665, 1591.179420593665, 1591.179420593665, 
1591.179420593665, 1591.179420593665, 1591.179420593665, 1591.179420593665], 
"eval_len": [408, 408, 408, 408, 408, 408, 408, 408, 408, 408]}

 79%|███████▉  | 789997/1000000 [8:53:50<1:46:26, 32.88it/s]global step 790000, trans_decision ep_re 781.8053245123663

{"global_step": 790000, "eval_re": [781.8053245123662, 781.8053245123662, 
781.8053245123662, 781.8053245123662, 781.8053245123662, 781.8053245123662, 
781.8053245123662, 781.8053245123662, 781.8053245123662, 781.8053245123662], 
"eval_len": [259, 259, 259, 259, 259, 259, 259, 259, 259, 259]}

 80%|███████▉  | 799998/1000000 [9:00:10<1:39:33, 33.48it/s]global step 800000, trans_decision ep_re 1372.5317091101842

{"global_step": 800000, "eval_re": [1372.5317091101842, 1372.5317091101842, 
1372.5317091101842, 1372.5317091101842, 1372.5317091101842, 1372.5317091101842, 
1372.5317091101842, 1372.5317091101842, 1372.5317091101842, 1372.5317091101842],
"eval_len": [364, 364, 364, 364, 364, 364, 364, 364, 364, 364]}

 81%|████████  | 809997/1000000 [9:07:00<1:36:15, 32.90it/s]global step 810000, trans_decision ep_re 623.2273985316617

{"global_step": 810000, "eval_re": [620.6047070132362, 620.6047070132362, 
620.6047070132362, 620.6047070132362, 620.6047070132362, 646.8316221974909, 
620.6047070132362, 620.6047070132362, 620.6047070132362, 620.6047070132362], 
"eval_len": [214, 214, 214, 214, 214, 220, 214, 214, 214, 214]}

 82%|████████▏ | 819997/1000000 [9:13:25<1:31:08, 32.92it/s]global step 820000, trans_decision ep_re 634.7824914726785

{"global_step": 820000, "eval_re": [634.7824914726784, 634.7824914726784, 
634.7824914726784, 634.7824914726784, 634.7824914726784, 634.7824914726784, 
634.7824914726784, 634.7824914726784, 634.7824914726784, 634.7824914726784], 
"eval_len": [217, 217, 217, 217, 217, 217, 217, 217, 217, 217]}

 83%|████████▎ | 829999/1000000 [9:20:20<1:26:32, 32.74it/s]global step 830000, trans_decision ep_re 2258.61558981127

{"global_step": 830000, "eval_re": [2258.61558981127, 2258.61558981127, 
2258.61558981127, 2258.61558981127, 2258.61558981127, 2258.61558981127, 
2258.61558981127, 2258.61558981127, 2258.61558981127, 2258.61558981127], 
"eval_len": [590, 590, 590, 590, 590, 590, 590, 590, 590, 590]}

 84%|████████▍ | 839997/1000000 [9:27:10<1:20:29, 33.13it/s]global step 840000, trans_decision ep_re 961.0765286014345

{"global_step": 840000, "eval_re": [962.460620830251, 962.460620830251, 
962.460620830251, 962.460620830251, 962.460620830251, 962.460620830251, 
962.460620830251, 962.460620830251, 948.6196985420873, 962.460620830251], 
"eval_len": [286, 286, 286, 286, 286, 286, 286, 286, 283, 286]}

 85%|████████▍ | 849997/1000000 [9:33:33<1:15:40, 33.04it/s]global step 850000, trans_decision ep_re 403.25858451409795

{"global_step": 850000, "eval_re": [403.25858451409795, 403.25858451409795, 
403.25858451409795, 403.25858451409795, 403.25858451409795, 403.25858451409795, 
403.25858451409795, 403.25858451409795, 403.25858451409795, 403.25858451409795],
"eval_len": [160, 160, 160, 160, 160, 160, 160, 160, 160, 160]}

 86%|████████▌ | 859996/1000000 [9:40:20<1:11:12, 32.77it/s]global step 860000, trans_decision ep_re 1339.0162995439716

{"global_step": 860000, "eval_re": [1339.0162995439716, 1339.0162995439716, 
1339.0162995439716, 1339.0162995439716, 1339.0162995439716, 1339.0162995439716, 
1339.0162995439716, 1339.0162995439716, 1339.0162995439716, 1339.0162995439716],
"eval_len": [359, 359, 359, 359, 359, 359, 359, 359, 359, 359]}

 87%|████████▋ | 869998/1000000 [9:47:00<1:05:49, 32.91it/s]global step 870000, trans_decision ep_re 1313.6696887142712

{"global_step": 870000, "eval_re": [1313.669688714271, 1313.669688714271, 
1313.669688714271, 1313.669688714271, 1313.669688714271, 1313.669688714271, 
1313.669688714271, 1313.669688714271, 1313.669688714271, 1313.669688714271], 
"eval_len": [349, 349, 349, 349, 349, 349, 349, 349, 349, 349]}

 88%|████████▊ | 879997/1000000 [9:53:23<1:00:09, 33.24it/s]global step 880000, trans_decision ep_re 270.50949035194924

{"global_step": 880000, "eval_re": [270.5094903519492, 270.5094903519492, 
270.5094903519492, 270.5094903519492, 270.5094903519492, 270.5094903519492, 
270.5094903519492, 270.5094903519492, 270.5094903519492, 270.5094903519492], 
"eval_len": [121, 121, 121, 121, 121, 121, 121, 121, 121, 121]}

 89%|████████▉ | 889996/1000000 [10:00:10<55:35, 32.98it/s]global step 890000, trans_decision ep_re 1722.8022696119904

{"global_step": 890000, "eval_re": [1722.8022696119904, 1722.8022696119904, 
1722.8022696119904, 1722.8022696119904, 1722.8022696119904, 1722.8022696119904, 
1722.8022696119904, 1722.8022696119904, 1722.8022696119904, 1722.8022696119904],
"eval_len": [427, 427, 427, 427, 427, 427, 427, 427, 427, 427]}

 90%|████████▉ | 899997/1000000 [10:06:35<50:13, 33.18it/s]global step 900000, trans_decision ep_re 42.18227032662562

{"global_step": 900000, "eval_re": [42.18227032662563, 42.18227032662563, 
42.18227032662563, 42.18227032662563, 42.18227032662563, 42.18227032662563, 
42.18227032662563, 42.18227032662563, 42.18227032662563, 42.18227032662563], 
"eval_len": [40, 40, 40, 40, 40, 40, 40, 40, 40, 40]}

 91%|█████████ | 909997/1000000 [10:13:20<44:58, 33.35it/s]global step 910000, trans_decision ep_re 1600.3099769802907

{"global_step": 910000, "eval_re": [1600.309976980291, 1600.309976980291, 
1600.309976980291, 1600.309976980291, 1600.309976980291, 1600.309976980291, 
1600.309976980291, 1600.309976980291, 1600.309976980291, 1600.309976980291], 
"eval_len": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400]}

 92%|█████████▏| 919999/1000000 [10:19:44<40:04, 33.27it/s]global step 920000, trans_decision ep_re 818.8133491570204

{"global_step": 920000, "eval_re": [818.8133491570204, 818.8133491570204, 
818.8133491570204, 818.8133491570204, 818.8133491570204, 818.8133491570204, 
818.8133491570204, 818.8133491570204, 818.8133491570204, 818.8133491570204], 
"eval_len": [245, 245, 245, 245, 245, 245, 245, 245, 245, 245]}

 93%|█████████▎| 929998/1000000 [10:26:30<35:13, 33.12it/s]global step 930000, trans_decision ep_re 1405.7158653038136

{"global_step": 930000, "eval_re": [1392.1991441323926, 1392.1991441323926, 
1392.1991441323926, 1392.1991441323926, 1527.3663558466046, 1392.1991441323926, 
1392.1991441323926, 1392.1991441323926, 1392.1991441323926, 1392.1991441323926],
"eval_len": [362, 362, 362, 362, 386, 362, 362, 362, 362, 362]}

 94%|█████████▍| 939997/1000000 [10:33:10<29:53, 33.46it/s]global step 940000, trans_decision ep_re 1677.190048080037

{"global_step": 940000, "eval_re": [1677.190048080037, 1677.190048080037, 
1677.190048080037, 1677.190048080037, 1677.190048080037, 1677.190048080037, 
1677.190048080037, 1677.190048080037, 1677.190048080037, 1677.190048080037], 
"eval_len": [421, 421, 421, 421, 421, 421, 421, 421, 421, 421]}

 95%|█████████▍| 949999/1000000 [10:39:50<25:15, 32.99it/s]global step 950000, trans_decision ep_re 1158.0513312017724

{"global_step": 950000, "eval_re": [1170.004123890817, 1170.004123890817, 
1050.476197000373, 1170.004123890817, 1170.004123890817, 1170.004123890817, 
1170.004123890817, 1170.004123890817, 1170.004123890817, 1170.004123890817], 
"eval_len": [298, 298, 276, 298, 298, 298, 298, 298, 298, 298]}

 96%|█████████▌| 959997/1000000 [10:46:14<19:51, 33.57it/s]global step 960000, trans_decision ep_re 571.7988902771104

{"global_step": 960000, "eval_re": [571.7988902771104, 571.7988902771104, 
571.7988902771104, 571.7988902771104, 571.7988902771104, 571.7988902771104, 
571.7988902771104, 571.7988902771104, 571.7988902771104, 571.7988902771104], 
"eval_len": [197, 197, 197, 197, 197, 197, 197, 197, 197, 197]}

 97%|█████████▋| 969996/1000000 [10:53:00<15:01, 33.30it/s]global step 970000, trans_decision ep_re 1568.2241141753188

{"global_step": 970000, "eval_re": [1568.2241141753188, 1568.2241141753188, 
1568.2241141753188, 1568.2241141753188, 1568.2241141753188, 1568.2241141753188, 
1568.2241141753188, 1568.2241141753188, 1568.2241141753188, 1568.2241141753188],
"eval_len": [383, 383, 383, 383, 383, 383, 383, 383, 383, 383]}

 98%|█████████▊| 979999/1000000 [10:59:40<10:02, 33.21it/s]global step 980000, trans_decision ep_re 1187.8222266087776

{"global_step": 980000, "eval_re": [1187.8222266087776, 1187.8222266087776, 
1187.8222266087776, 1187.8222266087776, 1187.8222266087776, 1187.8222266087776, 
1187.8222266087776, 1187.8222266087776, 1187.8222266087776, 1187.8222266087776],
"eval_len": [316, 316, 316, 316, 316, 316, 316, 316, 316, 316]}

 99%|█████████▉| 989999/1000000 [11:06:20<05:01, 33.12it/s]global step 990000, trans_decision ep_re 1634.385069521939

{"global_step": 990000, "eval_re": [1634.385069521939, 1634.385069521939, 
1634.385069521939, 1634.385069521939, 1634.385069521939, 1634.385069521939, 
1634.385069521939, 1634.385069521939, 1634.385069521939, 1634.385069521939], 
"eval_len": [418, 418, 418, 418, 418, 418, 418, 418, 418, 418]}

100%|█████████▉| 999997/1000000 [11:13:00<00:00, 33.14it/s]global step 1000000, trans_decision ep_re 945.0708699507446

{"global_step": 1000000, "eval_re": [945.0708699507445, 945.0708699507445, 
945.0708699507445, 945.0708699507445, 945.0708699507445, 945.0708699507445, 
945.0708699507445, 945.0708699507445, 945.0708699507445, 945.0708699507445], 
"eval_len": [255, 255, 255, 255, 255, 255, 255, 255, 255, 255]}

100%|██████████| 1000000/1000000 [11:13:01<00:00, 24.76it/s]
