
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.0
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:50<8:55:39, 30.80it/s]global step 10000, trans_decision ep_re 836.4575941941657

{"global_step": 10000, "eval_re": [836.4575941941657, 836.4575941941657, 
836.4575941941657, 836.4575941941657, 836.4575941941657, 836.4575941941657, 
836.4575941941657, 836.4575941941657, 836.4575941941657, 836.4575941941657], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [11:20<8:57:16, 30.40it/s]global step 20000, trans_decision ep_re 952.5114852069025

{"global_step": 20000, "eval_re": [952.5114852069028, 952.5114852069028, 
952.5114852069028, 952.5114852069028, 952.5114852069028, 952.5114852069028, 
952.5114852069028, 952.5114852069028, 952.5114852069028, 952.5114852069028], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29997/1000000 [18:50<8:40:09, 31.08it/s]global step 30000, trans_decision ep_re 915.0187570880134

{"global_step": 30000, "eval_re": [915.0187570880134, 915.0187570880134, 
915.0187570880134, 915.0187570880134, 915.0187570880134, 915.0187570880134, 
915.0187570880134, 915.0187570880134, 915.0187570880134, 915.0187570880134], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [26:20<8:37:00, 30.95it/s]global step 40000, trans_decision ep_re 539.9050156363304

{"global_step": 40000, "eval_re": [539.9050156363305, 539.9050156363305, 
539.9050156363305, 539.9050156363305, 539.9050156363305, 539.9050156363305, 
539.9050156363305, 539.9050156363305, 539.9050156363305, 539.9050156363305], 
"eval_len": [413, 413, 413, 413, 413, 413, 413, 413, 413, 413]}

  5%|▍         | 49997/1000000 [33:30<8:26:11, 31.28it/s]global step 50000, trans_decision ep_re 1451.2445449499153

{"global_step": 50000, "eval_re": [1451.2445449499153, 1451.2445449499153, 
1451.2445449499153, 1451.2445449499153, 1451.2445449499153, 1451.2445449499153, 
1451.2445449499153, 1451.2445449499153, 1451.2445449499153, 1451.2445449499153],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [41:10<8:26:45, 30.92it/s]global step 60000, trans_decision ep_re 867.1430370274862

{"global_step": 60000, "eval_re": [867.1430370274862, 867.1430370274862, 
867.1430370274862, 867.1430370274862, 867.1430370274862, 867.1430370274862, 
867.1430370274862, 867.1430370274862, 867.1430370274862, 867.1430370274862], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [48:40<8:21:41, 30.90it/s]global step 70000, trans_decision ep_re 783.4250622657817

{"global_step": 70000, "eval_re": [783.4250622657815, 783.4250622657815, 
783.4250622657815, 783.4250622657815, 783.4250622657815, 783.4250622657815, 
783.4250622657815, 783.4250622657815, 783.4250622657815, 783.4250622657815], 
"eval_len": [442, 442, 442, 442, 442, 442, 442, 442, 442, 442]}

  8%|▊         | 79997/1000000 [55:50<8:14:19, 31.02it/s]global step 80000, trans_decision ep_re 1908.935904740981

{"global_step": 80000, "eval_re": [1908.9359047409807, 1908.9359047409807, 
1908.9359047409807, 1908.9359047409807, 1908.9359047409807, 1908.9359047409807, 
1908.9359047409807, 1908.9359047409807, 1908.9359047409807, 1908.9359047409807],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89997/1000000 [1:03:20<8:08:07, 31.07it/s]global step 90000, trans_decision ep_re 524.6683874009516

{"global_step": 90000, "eval_re": [524.6683874009517, 524.6683874009517, 
524.6683874009517, 524.6683874009517, 524.6683874009517, 524.6683874009517, 
524.6683874009517, 524.6683874009517, 524.6683874009517, 524.6683874009517], 
"eval_len": [247, 247, 247, 247, 247, 247, 247, 247, 247, 247]}

 10%|▉         | 99997/1000000 [1:10:10<8:06:46, 30.81it/s]global step 100000, trans_decision ep_re 278.2394501580021

{"global_step": 100000, "eval_re": [278.2394501580021, 278.2394501580021, 
278.2394501580021, 278.2394501580021, 278.2394501580021, 278.2394501580021, 
278.2394501580021, 278.2394501580021, 278.2394501580021, 278.2394501580021], 
"eval_len": [179, 179, 179, 179, 179, 179, 179, 179, 179, 179]}

 11%|█         | 109997/1000000 [1:17:30<8:00:34, 30.87it/s]global step 110000, trans_decision ep_re 308.47103898651307

{"global_step": 110000, "eval_re": [308.47103898651307, 308.47103898651307, 
308.47103898651307, 308.47103898651307, 308.47103898651307, 308.47103898651307, 
308.47103898651307, 308.47103898651307, 308.47103898651307, 308.47103898651307],
"eval_len": [221, 221, 221, 221, 221, 221, 221, 221, 221, 221]}

 12%|█▏        | 119997/1000000 [1:24:30<7:55:38, 30.84it/s]global step 120000, trans_decision ep_re 1015.8725555433787

{"global_step": 120000, "eval_re": [1015.8725555433786, 1015.8725555433786, 
1015.8725555433786, 1015.8725555433786, 1015.8725555433786, 1015.8725555433786, 
1015.8725555433786, 1015.8725555433786, 1015.8725555433786, 1015.8725555433786],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:32:00<7:45:18, 31.16it/s]global step 130000, trans_decision ep_re 1783.3081934113718

{"global_step": 130000, "eval_re": [1791.229854821373, 1791.229854821373, 
1791.229854821373, 1712.0132407213628, 1791.229854821373, 1791.229854821373, 
1791.229854821373, 1791.229854821373, 1791.229854821373, 1791.229854821373], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:39:30<7:32:43, 31.66it/s]global step 140000, trans_decision ep_re 2865.216592753107

{"global_step": 140000, "eval_re": [2865.2165927531073, 2865.2165927531073, 
2865.2165927531073, 2865.2165927531073, 2865.2165927531073, 2865.2165927531073, 
2865.2165927531073, 2865.2165927531073, 2865.2165927531073, 2865.2165927531073],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149997/1000000 [1:47:00<7:38:35, 30.89it/s]global step 150000, trans_decision ep_re 2682.523485238834

{"global_step": 150000, "eval_re": [2682.523485238834, 2682.523485238834, 
2682.523485238834, 2682.523485238834, 2682.523485238834, 2682.523485238834, 
2682.523485238834, 2682.523485238834, 2682.523485238834, 2682.523485238834], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:54:40<7:33:50, 30.85it/s]global step 160000, trans_decision ep_re 2467.0881095613754

{"global_step": 160000, "eval_re": [2419.482286245343, 2589.9226304994845, 
2589.9226304994845, 1532.0177653725352, 2589.9226304994845, 2589.9226304994845, 
2589.9226304994845, 2589.9226304994845, 2589.9226304994845, 2589.9226304994845],
"eval_len": [1000, 1000, 1000, 701, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [2:02:00<7:28:26, 30.85it/s]global step 170000, trans_decision ep_re 1657.1681711802496

{"global_step": 170000, "eval_re": [1657.1681711802496, 1657.1681711802496, 
1657.1681711802496, 1657.1681711802496, 1657.1681711802496, 1657.1681711802496, 
1657.1681711802496, 1657.1681711802496, 1657.1681711802496, 1657.1681711802496],
"eval_len": [732, 732, 732, 732, 732, 732, 732, 732, 732, 732]}

 18%|█▊        | 179997/1000000 [2:09:30<7:16:50, 31.28it/s]global step 180000, trans_decision ep_re 2388.306216013768

{"global_step": 180000, "eval_re": [2388.3062160137683, 2388.3062160137683, 
2388.3062160137683, 2388.3062160137683, 2388.3062160137683, 2388.3062160137683, 
2388.3062160137683, 2388.3062160137683, 2388.3062160137683, 2388.3062160137683],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:16:42<7:11:05, 31.32it/s]global step 190000, trans_decision ep_re 73.73671013573701

{"global_step": 190000, "eval_re": [73.73671013573703, 73.73671013573703, 
73.73671013573703, 73.73671013573703, 73.73671013573703, 73.73671013573703, 
73.73671013573703, 73.73671013573703, 73.73671013573703, 73.73671013573703], 
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 20%|█▉        | 199997/1000000 [2:23:41<7:13:39, 30.75it/s]global step 200000, trans_decision ep_re 38.31430650051089

{"global_step": 200000, "eval_re": [38.31430650051089, 38.31430650051089, 
38.31430650051089, 38.31430650051089, 38.31430650051089, 38.31430650051089, 
38.31430650051089, 38.31430650051089, 38.31430650051089, 38.31430650051089], 
"eval_len": [28, 28, 28, 28, 28, 28, 28, 28, 28, 28]}

 21%|██        | 209997/1000000 [2:30:50<7:02:30, 31.16it/s]global step 210000, trans_decision ep_re 2890.020994609694

{"global_step": 210000, "eval_re": [2903.6665612801626, 2766.5096070106715, 
2903.6665612801626, 2903.6665612801626, 2903.6665612801626, 2903.6665612801626, 
2903.6665612801626, 2903.6665612801626, 2903.6665612801626, 2904.3678488449677],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219997/1000000 [2:38:20<6:55:43, 31.27it/s]global step 220000, trans_decision ep_re 21.198314032498587

{"global_step": 220000, "eval_re": [21.198314032498587, 21.198314032498587, 
21.198314032498587, 21.198314032498587, 21.198314032498587, 21.198314032498587, 
21.198314032498587, 21.198314032498587, 21.198314032498587, 21.198314032498587],
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 23%|██▎       | 229997/1000000 [2:45:20<6:54:17, 30.98it/s]global step 230000, trans_decision ep_re 1638.0288288704644

{"global_step": 230000, "eval_re": [1638.0288288704642, 1638.0288288704642, 
1638.0288288704642, 1638.0288288704642, 1638.0288288704642, 1638.0288288704642, 
1638.0288288704642, 1638.0288288704642, 1638.0288288704642, 1638.0288288704642],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239997/1000000 [2:53:00<6:45:23, 31.25it/s]global step 240000, trans_decision ep_re 2638.910272380047

{"global_step": 240000, "eval_re": [2641.762778353494, 2641.762778353494, 
2641.762778353494, 2641.762778353494, 2641.762778353494, 2641.762778353494, 
2641.762778353494, 2641.762778353494, 2641.762778353494, 2613.2377186190192], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [3:00:30<6:36:47, 31.50it/s]global step 250000, trans_decision ep_re 2831.4578545174654

{"global_step": 250000, "eval_re": [2831.457854517465, 2831.457854517465, 
2831.457854517465, 2831.457854517465, 2831.457854517465, 2831.457854517465, 
2831.457854517465, 2831.457854517465, 2831.457854517465, 2831.457854517465], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259997/1000000 [3:08:00<6:34:51, 31.24it/s]global step 260000, trans_decision ep_re 2351.9329156273902

{"global_step": 260000, "eval_re": [2335.780439691216, 2335.780439691216, 
2335.780439691216, 2335.780439691216, 2335.780439691216, 2335.780439691216, 
2335.780439691216, 2335.780439691216, 2497.305199052959, 2335.780439691216], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269997/1000000 [3:15:30<6:32:50, 30.97it/s]global step 270000, trans_decision ep_re 2993.321994546925

{"global_step": 270000, "eval_re": [3008.6550831436093, 3008.6550831436093, 
3008.6550831436093, 3008.6550831436093, 2961.9639910617784, 3008.6550831436093, 
3008.6550831436093, 2902.0152892585934, 3008.6550831436093, 3008.6550831436093],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279997/1000000 [3:23:00<6:28:16, 30.91it/s]global step 280000, trans_decision ep_re 2694.6381216399836

{"global_step": 280000, "eval_re": [2694.638121639984, 2694.638121639984, 
2694.638121639984, 2694.638121639984, 2694.638121639984, 2694.638121639984, 
2694.638121639984, 2694.638121639984, 2694.638121639984, 2694.638121639984], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289997/1000000 [3:30:30<6:22:27, 30.94it/s]global step 290000, trans_decision ep_re 2001.7772535531549

{"global_step": 290000, "eval_re": [2950.967651234338, 1896.3116538108013, 
1896.3116538108013, 1896.3116538108013, 1896.3116538108013, 1896.3116538108013, 
1896.3116538108013, 1896.3116538108013, 1896.3116538108013, 1896.3116538108013],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299997/1000000 [3:38:00<6:14:08, 31.18it/s]global step 300000, trans_decision ep_re 2902.759962477589

{"global_step": 300000, "eval_re": [2738.80419941894, 2920.977269484106, 
2920.977269484106, 2920.977269484106, 2920.977269484106, 2920.977269484106, 
2920.977269484106, 2920.977269484106, 2920.977269484106, 2920.977269484106], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309997/1000000 [3:45:30<6:08:37, 31.20it/s]global step 310000, trans_decision ep_re 3148.5196527037333

{"global_step": 310000, "eval_re": [3148.5196527037333, 3148.5196527037333, 
3148.5196527037333, 3148.5196527037333, 3148.5196527037333, 3148.5196527037333, 
3148.5196527037333, 3148.5196527037333, 3148.5196527037333, 3148.5196527037333],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319997/1000000 [3:53:00<6:06:19, 30.94it/s]global step 320000, trans_decision ep_re 2307.8479685177736

{"global_step": 320000, "eval_re": [2298.6483714756228, 2298.6483714756228, 
2298.6483714756228, 2298.6483714756228, 2298.6483714756228, 2298.6483714756228, 
2390.6443418971317, 2298.6483714756228, 2298.6483714756228, 2298.6483714756228],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329997/1000000 [4:00:30<5:59:35, 31.05it/s]global step 330000, trans_decision ep_re 2833.0258244776533

{"global_step": 330000, "eval_re": [2833.0258244776533, 2833.0258244776533, 
2833.0258244776533, 2833.0258244776533, 2833.0258244776533, 2833.0258244776533, 
2833.0258244776533, 2833.0258244776533, 2833.0258244776533, 2833.0258244776533],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339997/1000000 [4:08:00<5:59:49, 30.57it/s]global step 340000, trans_decision ep_re 2857.997009554116

{"global_step": 340000, "eval_re": [2855.186085940344, 2855.186085940344, 
2855.186085940344, 2855.186085940344, 2883.2953220780614, 2855.186085940344, 
2855.186085940344, 2855.186085940344, 2855.186085940344, 2855.186085940344], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349997/1000000 [4:15:30<5:52:22, 30.74it/s]global step 350000, trans_decision ep_re 2628.392994050156

{"global_step": 350000, "eval_re": [2636.407737328996, 2556.2603045406026, 
2636.407737328996, 2636.407737328996, 2636.407737328996, 2636.407737328996, 
2636.407737328996, 2636.407737328996, 2636.407737328996, 2636.407737328996], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359997/1000000 [4:23:00<5:47:08, 30.73it/s]global step 360000, trans_decision ep_re -207.51596298576618

{"global_step": 360000, "eval_re": [-207.5159629857662, -207.5159629857662, 
-207.5159629857662, -207.5159629857662, -207.5159629857662, -207.5159629857662, 
-207.5159629857662, -207.5159629857662, -207.5159629857662, -207.5159629857662],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369997/1000000 [4:30:30<5:41:01, 30.79it/s]global step 370000, trans_decision ep_re 2829.819360039789

{"global_step": 370000, "eval_re": [2829.819360039789, 2829.819360039789, 
2829.819360039789, 2829.819360039789, 2829.819360039789, 2829.819360039789, 
2829.819360039789, 2829.819360039789, 2829.819360039789, 2829.819360039789], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379997/1000000 [4:38:00<5:34:12, 30.92it/s]global step 380000, trans_decision ep_re 2696.9291649367815

{"global_step": 380000, "eval_re": [2693.2333097521073, 2693.2333097521073, 
2693.2333097521073, 2693.2333097521073, 2693.2333097521073, 2693.2333097521073, 
2693.2333097521073, 2730.1918615988493, 2693.2333097521073, 2693.2333097521073],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389997/1000000 [4:45:30<5:25:10, 31.27it/s]global step 390000, trans_decision ep_re 2905.531090000911

{"global_step": 390000, "eval_re": [2905.5310900009113, 2905.5310900009113, 
2905.5310900009113, 2905.5310900009113, 2905.5310900009113, 2905.5310900009113, 
2905.5310900009113, 2905.5310900009113, 2905.5310900009113, 2905.5310900009113],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [4:53:00<5:19:45, 31.27it/s]global step 400000, trans_decision ep_re 3175.2021992453515

{"global_step": 400000, "eval_re": [3175.2021992453515, 3175.2021992453515, 
3175.2021992453515, 3175.2021992453515, 3175.2021992453515, 3175.2021992453515, 
3175.2021992453515, 3175.2021992453515, 3175.2021992453515, 3175.2021992453515],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409997/1000000 [5:00:30<5:17:58, 30.92it/s]global step 410000, trans_decision ep_re 2535.281794226507

{"global_step": 410000, "eval_re": [2535.281794226507, 2535.281794226507, 
2535.281794226507, 2535.281794226507, 2535.281794226507, 2535.281794226507, 
2535.281794226507, 2535.281794226507, 2535.281794226507, 2535.281794226507], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419996/1000000 [5:07:50<5:07:39, 31.42it/s]global step 420000, trans_decision ep_re 2970.681364283241

{"global_step": 420000, "eval_re": [2970.6813642832403, 2970.6813642832403, 
2970.6813642832403, 2970.6813642832403, 2970.6813642832403, 2970.6813642832403, 
2970.6813642832403, 2970.6813642832403, 2970.6813642832403, 2970.6813642832403],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429997/1000000 [5:15:20<5:00:49, 31.58it/s]global step 430000, trans_decision ep_re 3052.728423700314

{"global_step": 430000, "eval_re": [3052.728423700314, 3052.728423700314, 
3052.728423700314, 3052.728423700314, 3052.728423700314, 3052.728423700314, 
3052.728423700314, 3052.728423700314, 3052.728423700314, 3052.728423700314], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439997/1000000 [5:22:40<4:57:23, 31.38it/s]global step 440000, trans_decision ep_re 2996.8881655400437

{"global_step": 440000, "eval_re": [3001.6191698262733, 2954.309126963976, 
3001.6191698262733, 3001.6191698262733, 3001.6191698262733, 3001.6191698262733, 
3001.6191698262733, 3001.6191698262733, 3001.6191698262733, 3001.6191698262733],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449997/1000000 [5:30:10<4:54:31, 31.12it/s]global step 450000, trans_decision ep_re 3102.647828238033

{"global_step": 450000, "eval_re": [3104.448165913189, 3104.448165913189, 
3104.448165913189, 3104.448165913189, 3104.448165913189, 3104.448165913189, 
3086.444789161633, 3104.448165913189, 3104.448165913189, 3104.448165913189], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459997/1000000 [5:37:40<4:48:18, 31.22it/s]global step 460000, trans_decision ep_re 1396.117304893412

{"global_step": 460000, "eval_re": [1396.1173048934118, 1396.1173048934118, 
1396.1173048934118, 1396.1173048934118, 1396.1173048934118, 1396.1173048934118, 
1396.1173048934118, 1396.1173048934118, 1396.1173048934118, 1396.1173048934118],
"eval_len": [738, 738, 738, 738, 738, 738, 738, 738, 738, 738]}

 47%|████▋     | 469997/1000000 [5:45:00<4:45:55, 30.89it/s]global step 470000, trans_decision ep_re 2728.1355089966046

{"global_step": 470000, "eval_re": [2718.5064112520167, 2718.5064112520167, 
2718.5064112520167, 2718.5064112520167, 2814.7973886978953, 2718.5064112520167, 
2718.5064112520167, 2718.5064112520167, 2718.5064112520167, 2718.5064112520167],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479997/1000000 [5:52:30<4:40:18, 30.92it/s]global step 480000, trans_decision ep_re 3116.200908114023

{"global_step": 480000, "eval_re": [3116.2009081140227, 3116.2009081140227, 
3116.2009081140227, 3116.2009081140227, 3116.2009081140227, 3116.2009081140227, 
3116.2009081140227, 3116.2009081140227, 3116.2009081140227, 3116.2009081140227],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489997/1000000 [6:00:00<4:30:14, 31.45it/s]global step 490000, trans_decision ep_re 2833.808999615883

{"global_step": 490000, "eval_re": [2833.8089996158833, 2833.8089996158833, 
2833.8089996158833, 2833.8089996158833, 2833.8089996158833, 2833.8089996158833, 
2833.8089996158833, 2833.8089996158833, 2833.8089996158833, 2833.8089996158833],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499997/1000000 [6:07:20<4:25:45, 31.36it/s]global step 500000, trans_decision ep_re 2993.751156250729

{"global_step": 500000, "eval_re": [2993.751156250729, 2993.751156250729, 
2993.751156250729, 2993.751156250729, 2993.751156250729, 2993.751156250729, 
2993.751156250729, 2993.751156250729, 2993.751156250729, 2993.751156250729], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509997/1000000 [6:14:50<4:19:15, 31.50it/s]global step 510000, trans_decision ep_re 3004.882199880106

{"global_step": 510000, "eval_re": [3004.882199880106, 3004.882199880106, 
3004.882199880106, 3004.882199880106, 3004.882199880106, 3004.882199880106, 
3004.882199880106, 3004.882199880106, 3004.882199880106, 3004.882199880106], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519997/1000000 [6:22:10<4:15:05, 31.36it/s]global step 520000, trans_decision ep_re 2175.9549411291923

{"global_step": 520000, "eval_re": [2076.187042305492, 2076.187042305492, 
2076.187042305492, 2076.187042305492, 2076.187042305492, 2076.187042305492, 
3073.8660305424914, 2076.187042305492, 2076.187042305492, 2076.187042305492], 
"eval_len": [685, 685, 685, 685, 685, 685, 1000, 685, 685, 685]}

 53%|█████▎    | 529997/1000000 [6:29:30<4:13:16, 30.93it/s]global step 530000, trans_decision ep_re 3103.41680752063

{"global_step": 530000, "eval_re": [3103.41680752063, 3103.41680752063, 
3103.41680752063, 3103.41680752063, 3103.41680752063, 3103.41680752063, 
3103.41680752063, 3103.41680752063, 3103.41680752063, 3103.41680752063], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539997/1000000 [6:37:00<4:09:37, 30.71it/s]global step 540000, trans_decision ep_re 2962.54260968842

{"global_step": 540000, "eval_re": [2962.5426096884203, 2962.5426096884203, 
2962.5426096884203, 2962.5426096884203, 2962.5426096884203, 2962.5426096884203, 
2962.5426096884203, 2962.5426096884203, 2962.5426096884203, 2962.5426096884203],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549997/1000000 [6:44:30<3:58:42, 31.42it/s]global step 550000, trans_decision ep_re 3237.440004013131

{"global_step": 550000, "eval_re": [3237.4400040131313, 3237.4400040131313, 
3237.4400040131313, 3237.4400040131313, 3237.4400040131313, 3237.4400040131313, 
3237.4400040131313, 3237.4400040131313, 3237.4400040131313, 3237.4400040131313],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559997/1000000 [6:51:50<3:52:31, 31.54it/s]global step 560000, trans_decision ep_re 2304.6541384759375

{"global_step": 560000, "eval_re": [2304.654138475938, 2304.654138475938, 
2304.654138475938, 2304.654138475938, 2304.654138475938, 2304.654138475938, 
2304.654138475938, 2304.654138475938, 2304.654138475938, 2304.654138475938], 
"eval_len": [810, 810, 810, 810, 810, 810, 810, 810, 810, 810]}

 57%|█████▋    | 569997/1000000 [6:59:10<3:49:37, 31.21it/s]global step 570000, trans_decision ep_re 3394.3757253882104

{"global_step": 570000, "eval_re": [3394.263234864023, 3394.263234864023, 
3394.263234864023, 3394.263234864023, 3394.263234864023, 3394.263234864023, 
3394.263234864023, 3395.3881401058993, 3394.263234864023, 3394.263234864023], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579997/1000000 [7:06:40<3:42:37, 31.44it/s]global step 580000, trans_decision ep_re 2965.102850433602

{"global_step": 580000, "eval_re": [2965.102850433602, 2965.102850433602, 
2965.102850433602, 2965.102850433602, 2965.102850433602, 2965.102850433602, 
2965.102850433602, 2965.102850433602, 2965.102850433602, 2965.102850433602], 
"eval_len": [956, 956, 956, 956, 956, 956, 956, 956, 956, 956]}

 59%|█████▉    | 589997/1000000 [7:14:00<3:39:58, 31.06it/s]global step 590000, trans_decision ep_re 3367.981850319765

{"global_step": 590000, "eval_re": [3366.858819932132, 3366.858819932132, 
3366.858819932132, 3366.858819932132, 3366.858819932132, 3366.858819932132, 
3366.858819932132, 3366.858819932132, 3366.858819932132, 3378.0891238084596], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599997/1000000 [7:21:30<3:36:33, 30.78it/s]global step 600000, trans_decision ep_re 3143.691959513274

{"global_step": 600000, "eval_re": [3143.6919595132736, 3143.6919595132736, 
3143.6919595132736, 3143.6919595132736, 3143.6919595132736, 3143.6919595132736, 
3143.6919595132736, 3143.6919595132736, 3143.6919595132736, 3143.6919595132736],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609997/1000000 [7:29:00<3:29:19, 31.05it/s]global step 610000, trans_decision ep_re 3390.516068768566

{"global_step": 610000, "eval_re": [3390.516068768566, 3390.516068768566, 
3390.516068768566, 3390.516068768566, 3390.516068768566, 3390.516068768566, 
3390.516068768566, 3390.516068768566, 3390.516068768566, 3390.516068768566], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619997/1000000 [7:36:30<3:21:30, 31.43it/s]global step 620000, trans_decision ep_re 2880.3729334910195

{"global_step": 620000, "eval_re": [2880.372933491019, 2880.372933491019, 
2880.372933491019, 2880.372933491019, 2880.372933491019, 2880.372933491019, 
2880.372933491019, 2880.372933491019, 2880.372933491019, 2880.372933491019], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [7:44:00<3:13:32, 31.86it/s]global step 630000, trans_decision ep_re 3367.229440412001

{"global_step": 630000, "eval_re": [3367.2294404120007, 3367.2294404120007, 
3367.2294404120007, 3367.2294404120007, 3367.2294404120007, 3367.2294404120007, 
3367.2294404120007, 3367.2294404120007, 3367.2294404120007, 3367.2294404120007],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639997/1000000 [7:51:20<3:15:39, 30.67it/s]global step 640000, trans_decision ep_re 3092.0908058144896

{"global_step": 640000, "eval_re": [3092.0908058144896, 3092.0908058144896, 
3092.0908058144896, 3092.0908058144896, 3092.0908058144896, 3092.0908058144896, 
3092.0908058144896, 3092.0908058144896, 3092.0908058144896, 3092.0908058144896],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649997/1000000 [7:58:50<3:08:59, 30.87it/s]global step 650000, trans_decision ep_re 3354.4367564372888

{"global_step": 650000, "eval_re": [3354.4367564372888, 3354.4367564372888, 
3354.4367564372888, 3354.4367564372888, 3354.4367564372888, 3354.4367564372888, 
3354.4367564372888, 3354.4367564372888, 3354.4367564372888, 3354.4367564372888],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [8:06:20<3:00:18, 31.43it/s]global step 660000, trans_decision ep_re 3127.392025899003

{"global_step": 660000, "eval_re": [3131.956514710593, 3131.956514710593, 
3131.956514710593, 3086.311626594694, 3131.956514710593, 3131.956514710593, 
3131.956514710593, 3131.956514710593, 3131.956514710593, 3131.956514710593], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669997/1000000 [8:13:50<2:55:34, 31.33it/s]global step 670000, trans_decision ep_re 3149.2713802552053

{"global_step": 670000, "eval_re": [3146.9504258428187, 3146.9504258428187, 
3146.9504258428187, 3146.9504258428187, 3170.159969966684, 3146.9504258428187, 
3146.9504258428187, 3146.9504258428187, 3146.9504258428187, 3146.9504258428187],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679997/1000000 [8:21:10<2:50:09, 31.34it/s]global step 680000, trans_decision ep_re 3053.5277511089835

{"global_step": 680000, "eval_re": [3053.5277511089835, 3053.5277511089835, 
3053.5277511089835, 3053.5277511089835, 3053.5277511089835, 3053.5277511089835, 
3053.5277511089835, 3053.5277511089835, 3053.5277511089835, 3053.5277511089835],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689997/1000000 [8:28:40<2:46:15, 31.08it/s]global step 690000, trans_decision ep_re 2814.6305524885984

{"global_step": 690000, "eval_re": [2814.630552488599, 2814.630552488599, 
2814.630552488599, 2814.630552488599, 2814.630552488599, 2814.630552488599, 
2814.630552488599, 2814.630552488599, 2814.630552488599, 2814.630552488599], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699997/1000000 [8:36:00<2:39:55, 31.27it/s]global step 700000, trans_decision ep_re 3414.694098629521

{"global_step": 700000, "eval_re": [3414.694098629521, 3414.694098629521, 
3414.694098629521, 3414.694098629521, 3414.694098629521, 3414.694098629521, 
3414.694098629521, 3414.694098629521, 3414.694098629521, 3414.694098629521], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709997/1000000 [8:43:12<2:32:13, 31.75it/s]global step 710000, trans_decision ep_re 26.625328539229933

{"global_step": 710000, "eval_re": [26.625328539229933, 26.625328539229933, 
26.625328539229933, 26.625328539229933, 26.625328539229933, 26.625328539229933, 
26.625328539229933, 26.625328539229933, 26.625328539229933, 26.625328539229933],
"eval_len": [27, 27, 27, 27, 27, 27, 27, 27, 27, 27]}

 72%|███████▏  | 719997/1000000 [8:50:20<2:27:21, 31.67it/s]global step 720000, trans_decision ep_re 2934.817250465635

{"global_step": 720000, "eval_re": [2935.9988966782266, 2935.9988966782266, 
2935.9988966782266, 2935.9988966782266, 2935.9988966782266, 2935.9988966782266, 
2924.182434552315, 2935.9988966782266, 2935.9988966782266, 2935.9988966782266], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729997/1000000 [8:57:40<2:23:22, 31.39it/s]global step 730000, trans_decision ep_re 3127.3839332708258

{"global_step": 730000, "eval_re": [3127.3839332708253, 3127.3839332708253, 
3127.3839332708253, 3127.3839332708253, 3127.3839332708253, 3127.3839332708253, 
3127.3839332708253, 3127.3839332708253, 3127.3839332708253, 3127.3839332708253],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739997/1000000 [9:05:00<2:14:28, 32.22it/s]global step 740000, trans_decision ep_re 3064.4224654174914

{"global_step": 740000, "eval_re": [3064.422465417492, 3064.422465417492, 
3064.422465417492, 3064.422465417492, 3064.422465417492, 3064.422465417492, 
3064.422465417492, 3064.422465417492, 3064.422465417492, 3064.422465417492], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749997/1000000 [9:12:20<2:12:18, 31.49it/s]global step 750000, trans_decision ep_re 3079.43027910362

{"global_step": 750000, "eval_re": [3079.43027910362, 3079.43027910362, 
3079.43027910362, 3079.43027910362, 3079.43027910362, 3079.43027910362, 
3079.43027910362, 3079.43027910362, 3079.43027910362, 3079.43027910362], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759997/1000000 [9:20:00<2:08:27, 31.14it/s]global step 760000, trans_decision ep_re 3256.365907785831

{"global_step": 760000, "eval_re": [3256.365907785831, 3256.365907785831, 
3256.365907785831, 3256.365907785831, 3256.365907785831, 3256.365907785831, 
3256.365907785831, 3256.365907785831, 3256.365907785831, 3256.365907785831], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769997/1000000 [9:27:20<2:02:46, 31.22it/s]global step 770000, trans_decision ep_re 3332.173038029638

{"global_step": 770000, "eval_re": [3332.173038029638, 3332.173038029638, 
3332.173038029638, 3332.173038029638, 3332.173038029638, 3332.173038029638, 
3332.173038029638, 3332.173038029638, 3332.173038029638, 3332.173038029638], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [9:34:50<1:57:06, 31.31it/s]global step 780000, trans_decision ep_re 3584.583821316931

{"global_step": 780000, "eval_re": [3584.583821316931, 3584.583821316931, 
3584.583821316931, 3584.583821316931, 3584.583821316931, 3584.583821316931, 
3584.583821316931, 3584.583821316931, 3584.583821316931, 3584.583821316931], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789997/1000000 [9:42:20<1:52:25, 31.13it/s]global step 790000, trans_decision ep_re 3255.2935477381025

{"global_step": 790000, "eval_re": [3258.6143940385527, 3258.6143940385527, 
3258.6143940385527, 3258.6143940385527, 3225.4059310340513, 3258.6143940385527, 
3258.6143940385527, 3258.6143940385527, 3258.6143940385527, 3258.6143940385527],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799997/1000000 [9:49:50<1:47:24, 31.03it/s]global step 800000, trans_decision ep_re 3377.6123203053585

{"global_step": 800000, "eval_re": [3377.6123203053585, 3377.6123203053585, 
3377.6123203053585, 3377.6123203053585, 3377.6123203053585, 3377.6123203053585, 
3377.6123203053585, 3377.6123203053585, 3377.6123203053585, 3377.6123203053585],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809997/1000000 [9:57:20<1:42:48, 30.80it/s]global step 810000, trans_decision ep_re 3453.555314225214

{"global_step": 810000, "eval_re": [3415.0060967551813, 3457.838560610773, 
3457.838560610773, 3457.838560610773, 3457.838560610773, 3457.838560610773, 
3457.838560610773, 3457.838560610773, 3457.838560610773, 3457.838560610773], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819997/1000000 [10:04:50<1:36:24, 31.12it/s]global step 820000, trans_decision ep_re 3408.0326779735137

{"global_step": 820000, "eval_re": [3408.0326779735137, 3408.0326779735137, 
3408.0326779735137, 3408.0326779735137, 3408.0326779735137, 3408.0326779735137, 
3408.0326779735137, 3408.0326779735137, 3408.0326779735137, 3408.0326779735137],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829997/1000000 [10:12:20<1:31:36, 30.93it/s]global step 830000, trans_decision ep_re 3363.6970117209507

{"global_step": 830000, "eval_re": [3363.697011720951, 3363.697011720951, 
3363.697011720951, 3363.697011720951, 3363.697011720951, 3363.697011720951, 
3363.697011720951, 3363.697011720951, 3363.697011720951, 3363.697011720951], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839997/1000000 [10:19:50<1:25:16, 31.27it/s]global step 840000, trans_decision ep_re 3191.943416337262

{"global_step": 840000, "eval_re": [3188.911792990499, 3188.911792990499, 
3188.911792990499, 3188.911792990499, 3188.911792990499, 3188.911792990499, 
3188.911792990499, 3188.911792990499, 3219.228026458128, 3188.911792990499], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849997/1000000 [10:27:10<1:20:02, 31.23it/s]global step 850000, trans_decision ep_re 3435.9241863255493

{"global_step": 850000, "eval_re": [3447.142168933381, 3334.962342855059, 
3447.142168933381, 3447.142168933381, 3447.142168933381, 3447.142168933381, 
3447.142168933381, 3447.142168933381, 3447.142168933381, 3447.142168933381], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [10:34:40<1:14:03, 31.51it/s]global step 860000, trans_decision ep_re 3479.393374045894

{"global_step": 860000, "eval_re": [3501.6010930049097, 3501.6010930049097, 
3501.6010930049097, 3501.6010930049097, 3501.6010930049097, 3501.6010930049097, 
3501.6010930049097, 3501.6010930049097, 3501.6010930049097, 3279.523903414754], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869997/1000000 [10:42:00<1:08:25, 31.66it/s]global step 870000, trans_decision ep_re 3564.339433995109

{"global_step": 870000, "eval_re": [3573.5499129095792, 3573.5499129095792, 
3573.5499129095792, 3573.5499129095792, 3573.5499129095792, 3573.5499129095792, 
3573.5499129095792, 3573.5499129095792, 3573.5499129095792, 3481.445123764879], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [10:49:20<1:02:41, 31.90it/s]global step 880000, trans_decision ep_re 3263.896262119357

{"global_step": 880000, "eval_re": [3263.896262119357, 3263.896262119357, 
3263.896262119357, 3263.896262119357, 3263.896262119357, 3263.896262119357, 
3263.896262119357, 3263.896262119357, 3263.896262119357, 3263.896262119357], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889997/1000000 [10:56:40<57:36, 31.83it/s]global step 890000, trans_decision ep_re 3638.482830026832

{"global_step": 890000, "eval_re": [3641.5819902026387, 3641.5819902026387, 
3610.59038844457, 3641.5819902026387, 3641.5819902026387, 3641.5819902026387, 
3641.5819902026387, 3641.5819902026387, 3641.5819902026387, 3641.5819902026387],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [11:04:00<52:33, 31.71it/s]global step 900000, trans_decision ep_re 3623.0825701987633

{"global_step": 900000, "eval_re": [3623.0825701987633, 3623.0825701987633, 
3623.0825701987633, 3623.0825701987633, 3623.0825701987633, 3623.0825701987633, 
3623.0825701987633, 3623.0825701987633, 3623.0825701987633, 3623.0825701987633],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [11:11:20<47:35, 31.52it/s]global step 910000, trans_decision ep_re 3445.932135696478

{"global_step": 910000, "eval_re": [3445.9321356964774, 3445.9321356964774, 
3445.9321356964774, 3445.9321356964774, 3445.9321356964774, 3445.9321356964774, 
3445.9321356964774, 3445.9321356964774, 3445.9321356964774, 3445.9321356964774],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [11:18:40<41:36, 32.05it/s]global step 920000, trans_decision ep_re 3359.179688743472

{"global_step": 920000, "eval_re": [3359.6589417170526, 3359.6589417170526, 
3359.6589417170526, 3359.6589417170526, 3359.6589417170526, 3359.6589417170526, 
3359.6589417170526, 3359.6589417170526, 3354.8664119812506, 3359.6589417170526],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [11:26:00<37:09, 31.40it/s]global step 930000, trans_decision ep_re 3511.046843070932

{"global_step": 930000, "eval_re": [3511.046843070932, 3511.046843070932, 
3511.046843070932, 3511.046843070932, 3511.046843070932, 3511.046843070932, 
3511.046843070932, 3511.046843070932, 3511.046843070932, 3511.046843070932], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939997/1000000 [11:33:20<31:26, 31.81it/s]global step 940000, trans_decision ep_re 3293.843800194253

{"global_step": 940000, "eval_re": [3293.843800194253, 3293.843800194253, 
3293.843800194253, 3293.843800194253, 3293.843800194253, 3293.843800194253, 
3293.843800194253, 3293.843800194253, 3293.843800194253, 3293.843800194253], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949997/1000000 [11:40:40<26:01, 32.02it/s]global step 950000, trans_decision ep_re 3233.689332148174

{"global_step": 950000, "eval_re": [3233.6893321481743, 3233.6893321481743, 
3233.6893321481743, 3233.6893321481743, 3233.6893321481743, 3233.6893321481743, 
3233.6893321481743, 3233.6893321481743, 3233.6893321481743, 3233.6893321481743],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959997/1000000 [11:48:00<20:56, 31.83it/s]global step 960000, trans_decision ep_re 3599.109736782557

{"global_step": 960000, "eval_re": [3626.3988485696827, 3626.3988485696827, 
3626.3988485696827, 3626.3988485696827, 3626.3988485696827, 3626.3988485696827, 
3626.3988485696827, 3626.3988485696827, 3626.3988485696827, 3353.5077306984217],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969997/1000000 [11:55:20<15:33, 32.14it/s]global step 970000, trans_decision ep_re 3243.6282843751387

{"global_step": 970000, "eval_re": [3243.6282843751383, 3243.6282843751383, 
3243.6282843751383, 3243.6282843751383, 3243.6282843751383, 3243.6282843751383, 
3243.6282843751383, 3243.6282843751383, 3243.6282843751383, 3243.6282843751383],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [12:02:40<10:34, 31.50it/s]global step 980000, trans_decision ep_re 3307.7448267224

{"global_step": 980000, "eval_re": [3307.7448267223995, 3307.7448267223995, 
3307.7448267223995, 3307.7448267223995, 3307.7448267223995, 3307.7448267223995, 
3307.7448267223995, 3307.7448267223995, 3307.7448267223995, 3307.7448267223995],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989997/1000000 [12:09:43<05:12, 32.03it/s]global step 990000, trans_decision ep_re 35.87753350067253

{"global_step": 990000, "eval_re": [35.87753350067253, 35.87753350067253, 
35.87753350067253, 35.87753350067253, 35.87753350067253, 35.87753350067253, 
35.87753350067253, 35.87753350067253, 35.87753350067253, 35.87753350067253], 
"eval_len": [38, 38, 38, 38, 38, 38, 38, 38, 38, 38]}

100%|█████████▉| 999997/1000000 [12:16:50<00:00, 32.07it/s]global step 1000000, trans_decision ep_re 3178.697078771843

{"global_step": 1000000, "eval_re": [3168.035173610076, 3195.1226059917935, 
3195.1226059917935, 3195.1226059917935, 3195.1226059917935, 3195.1226059917935, 
3195.1226059917935, 3057.9547661740044, 3195.1226059917935, 3195.1226059917935],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [12:17:15<00:00, 22.61it/s]
