
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [06:30<14:37:14, 18.81it/s]global step 10000, trans_decision ep_re 368.2697057130582

{"global_step": 10000, "eval_re": [368.2697057130582, 368.2697057130582, 
368.2697057130582, 368.2697057130582, 368.2697057130582, 368.2697057130582, 
368.2697057130582, 368.2697057130582, 368.2697057130582, 368.2697057130582], 
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

  2%|▏         | 19998/1000000 [18:50<14:49:59, 18.35it/s]global step 20000, trans_decision ep_re 240.96693386656793

{"global_step": 20000, "eval_re": [240.96693386656793, 240.96693386656793, 
240.96693386656793, 240.96693386656793, 240.96693386656793, 240.96693386656793, 
240.96693386656793, 240.96693386656793, 240.96693386656793, 240.96693386656793],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

  3%|▎         | 29998/1000000 [31:10<14:32:43, 18.52it/s]global step 30000, trans_decision ep_re 308.60894175124287

{"global_step": 30000, "eval_re": [308.6089417512428, 308.6089417512428, 
308.6089417512428, 308.6089417512428, 308.6089417512428, 308.6089417512428, 
308.6089417512428, 308.6089417512428, 308.6089417512428, 308.6089417512428], 
"eval_len": [58, 58, 58, 58, 58, 58, 58, 58, 58, 58]}

  4%|▍         | 39998/1000000 [43:30<14:18:07, 18.65it/s]global step 40000, trans_decision ep_re 263.9246193577466

{"global_step": 40000, "eval_re": [263.9246193577467, 263.9246193577467, 
263.9246193577467, 263.9246193577467, 263.9246193577467, 263.9246193577467, 
263.9246193577467, 263.9246193577467, 263.9246193577467, 263.9246193577467], 
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

  5%|▍         | 49998/1000000 [55:50<14:10:12, 18.62it/s]global step 50000, trans_decision ep_re 285.2593950225264

{"global_step": 50000, "eval_re": [285.2593950225264, 285.2593950225264, 
285.2593950225264, 285.2593950225264, 285.2593950225264, 285.2593950225264, 
285.2593950225264, 285.2593950225264, 285.2593950225264, 285.2593950225264], 
"eval_len": [54, 54, 54, 54, 54, 54, 54, 54, 54, 54]}

  6%|▌         | 59998/1000000 [1:08:10<13:56:58, 18.72it/s]global step 60000, trans_decision ep_re 348.89888855932037

{"global_step": 60000, "eval_re": [348.89888855932037, 348.89888855932037, 
348.89888855932037, 348.89888855932037, 348.89888855932037, 348.89888855932037, 
348.89888855932037, 348.89888855932037, 348.89888855932037, 348.89888855932037],
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

  7%|▋         | 69998/1000000 [1:20:20<13:47:53, 18.72it/s]global step 70000, trans_decision ep_re 386.9621093557067

{"global_step": 70000, "eval_re": [386.9621093557067, 386.9621093557067, 
386.9621093557067, 386.9621093557067, 386.9621093557067, 386.9621093557067, 
386.9621093557067, 386.9621093557067, 386.9621093557067, 386.9621093557067], 
"eval_len": [72, 72, 72, 72, 72, 72, 72, 72, 72, 72]}

  8%|▊         | 79998/1000000 [1:32:40<13:36:39, 18.78it/s]global step 80000, trans_decision ep_re 290.6706984260835

{"global_step": 80000, "eval_re": [290.67069842608345, 290.67069842608345, 
290.67069842608345, 290.67069842608345, 290.67069842608345, 290.67069842608345, 
290.67069842608345, 290.67069842608345, 290.67069842608345, 290.67069842608345],
"eval_len": [55, 55, 55, 55, 55, 55, 55, 55, 55, 55]}

  9%|▉         | 89998/1000000 [1:45:00<13:33:20, 18.65it/s]global step 90000, trans_decision ep_re 403.1955996924526

{"global_step": 90000, "eval_re": [403.1955996924526, 403.1955996924526, 
403.1955996924526, 403.1955996924526, 403.1955996924526, 403.1955996924526, 
403.1955996924526, 403.1955996924526, 403.1955996924526, 403.1955996924526], 
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 10%|▉         | 99998/1000000 [1:57:20<13:19:56, 18.75it/s]global step 100000, trans_decision ep_re 367.11689608872155

{"global_step": 100000, "eval_re": [367.1168960887215, 367.1168960887215, 
367.1168960887215, 367.1168960887215, 367.1168960887215, 367.1168960887215, 
367.1168960887215, 367.1168960887215, 367.1168960887215, 367.1168960887215], 
"eval_len": [68, 68, 68, 68, 68, 68, 68, 68, 68, 68]}

 11%|█         | 109998/1000000 [2:09:30<13:05:32, 18.88it/s]global step 110000, trans_decision ep_re 408.0297201004901

{"global_step": 110000, "eval_re": [408.0297201004901, 408.0297201004901, 
408.0297201004901, 408.0297201004901, 408.0297201004901, 408.0297201004901, 
408.0297201004901, 408.0297201004901, 408.0297201004901, 408.0297201004901], 
"eval_len": [75, 75, 75, 75, 75, 75, 75, 75, 75, 75]}

 12%|█▏        | 119998/1000000 [2:21:50<12:52:49, 18.98it/s]global step 120000, trans_decision ep_re 290.0631947089615

{"global_step": 120000, "eval_re": [290.0631947089615, 290.0631947089615, 
290.0631947089615, 290.0631947089615, 290.0631947089615, 290.0631947089615, 
290.0631947089615, 290.0631947089615, 290.0631947089615, 290.0631947089615], 
"eval_len": [55, 55, 55, 55, 55, 55, 55, 55, 55, 55]}

 13%|█▎        | 129998/1000000 [2:34:10<12:46:39, 18.91it/s]global step 130000, trans_decision ep_re 273.5232744123297

{"global_step": 130000, "eval_re": [273.5232744123298, 273.5232744123298, 
273.5232744123298, 273.5232744123298, 273.5232744123298, 273.5232744123298, 
273.5232744123298, 273.5232744123298, 273.5232744123298, 273.5232744123298], 
"eval_len": [52, 52, 52, 52, 52, 52, 52, 52, 52, 52]}

 14%|█▍        | 139998/1000000 [2:46:20<12:43:07, 18.78it/s]global step 140000, trans_decision ep_re 401.0268477815297

{"global_step": 140000, "eval_re": [401.0268477815296, 401.0268477815296, 
401.0268477815296, 401.0268477815296, 401.0268477815296, 401.0268477815296, 
401.0268477815296, 401.0268477815296, 401.0268477815296, 401.0268477815296], 
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 15%|█▍        | 149998/1000000 [2:58:40<12:37:20, 18.71it/s]global step 150000, trans_decision ep_re 380.18083971585963

{"global_step": 150000, "eval_re": [380.1808397158596, 380.1808397158596, 
380.1808397158596, 380.1808397158596, 380.1808397158596, 380.1808397158596, 
380.1808397158596, 380.1808397158596, 380.1808397158596, 380.1808397158596], 
"eval_len": [71, 71, 71, 71, 71, 71, 71, 71, 71, 71]}

 16%|█▌        | 159998/1000000 [3:11:00<12:28:33, 18.70it/s]global step 160000, trans_decision ep_re 393.0208949492844

{"global_step": 160000, "eval_re": [393.0208949492844, 393.0208949492844, 
393.0208949492844, 393.0208949492844, 393.0208949492844, 393.0208949492844, 
393.0208949492844, 393.0208949492844, 393.0208949492844, 393.0208949492844], 
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 17%|█▋        | 169998/1000000 [3:23:20<12:16:56, 18.77it/s]global step 170000, trans_decision ep_re 394.56696952739867

{"global_step": 170000, "eval_re": [394.56696952739867, 394.56696952739867, 
394.56696952739867, 394.56696952739867, 394.56696952739867, 394.56696952739867, 
394.56696952739867, 394.56696952739867, 394.56696952739867, 394.56696952739867],
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 18%|█▊        | 179998/1000000 [3:35:40<12:09:21, 18.74it/s]global step 180000, trans_decision ep_re 389.14959625104996

{"global_step": 180000, "eval_re": [389.14959625104996, 389.14959625104996, 
389.14959625104996, 389.14959625104996, 389.14959625104996, 389.14959625104996, 
389.14959625104996, 389.14959625104996, 389.14959625104996, 389.14959625104996],
"eval_len": [72, 72, 72, 72, 72, 72, 72, 72, 72, 72]}

 19%|█▉        | 189998/1000000 [3:48:00<11:59:54, 18.75it/s]global step 190000, trans_decision ep_re 383.28899288705304

{"global_step": 190000, "eval_re": [383.28899288705304, 383.28899288705304, 
383.28899288705304, 383.28899288705304, 383.28899288705304, 383.28899288705304, 
383.28899288705304, 383.28899288705304, 383.28899288705304, 383.28899288705304],
"eval_len": [71, 71, 71, 71, 71, 71, 71, 71, 71, 71]}

 20%|█▉        | 199998/1000000 [4:00:20<11:48:47, 18.81it/s]global step 200000, trans_decision ep_re 389.0865915119483

{"global_step": 200000, "eval_re": [389.0865915119483, 389.0865915119483, 
389.0865915119483, 389.0865915119483, 389.0865915119483, 389.0865915119483, 
389.0865915119483, 389.0865915119483, 389.0865915119483, 389.0865915119483], 
"eval_len": [72, 72, 72, 72, 72, 72, 72, 72, 72, 72]}

 21%|██        | 209998/1000000 [4:12:30<11:39:51, 18.81it/s]global step 210000, trans_decision ep_re 393.82771873978015

{"global_step": 210000, "eval_re": [393.82771873978015, 393.82771873978015, 
393.82771873978015, 393.82771873978015, 393.82771873978015, 393.82771873978015, 
393.82771873978015, 393.82771873978015, 393.82771873978015, 393.82771873978015],
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 22%|██▏       | 219998/1000000 [4:24:50<11:35:30, 18.69it/s]global step 220000, trans_decision ep_re 389.58572885829875

{"global_step": 220000, "eval_re": [389.58572885829875, 389.58572885829875, 
389.58572885829875, 389.58572885829875, 389.58572885829875, 389.58572885829875, 
389.58572885829875, 389.58572885829875, 389.58572885829875, 389.58572885829875],
"eval_len": [72, 72, 72, 72, 72, 72, 72, 72, 72, 72]}

 23%|██▎       | 229998/1000000 [4:37:10<11:22:40, 18.80it/s]global step 230000, trans_decision ep_re 394.9824039111154

{"global_step": 230000, "eval_re": [394.9824039111154, 394.9824039111154, 
394.9824039111154, 394.9824039111154, 394.9824039111154, 394.9824039111154, 
394.9824039111154, 394.9824039111154, 394.9824039111154, 394.9824039111154], 
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 24%|██▍       | 239998/1000000 [4:49:30<11:13:47, 18.80it/s]global step 240000, trans_decision ep_re 386.8230821379735

{"global_step": 240000, "eval_re": [386.8230821379735, 386.8230821379735, 
386.8230821379735, 386.8230821379735, 386.8230821379735, 386.8230821379735, 
386.8230821379735, 386.8230821379735, 386.8230821379735, 386.8230821379735], 
"eval_len": [72, 72, 72, 72, 72, 72, 72, 72, 72, 72]}

 25%|██▍       | 249998/1000000 [5:01:50<11:03:41, 18.83it/s]global step 250000, trans_decision ep_re 382.9703110187421

{"global_step": 250000, "eval_re": [382.97031101874217, 382.97031101874217, 
382.97031101874217, 382.97031101874217, 382.97031101874217, 382.97031101874217, 
382.97031101874217, 382.97031101874217, 382.97031101874217, 382.97031101874217],
"eval_len": [71, 71, 71, 71, 71, 71, 71, 71, 71, 71]}

 26%|██▌       | 259998/1000000 [5:14:00<10:51:05, 18.94it/s]global step 260000, trans_decision ep_re 410.29485550048673

{"global_step": 260000, "eval_re": [410.29485550048673, 410.29485550048673, 
410.29485550048673, 410.29485550048673, 410.29485550048673, 410.29485550048673, 
410.29485550048673, 410.29485550048673, 410.29485550048673, 410.29485550048673],
"eval_len": [75, 75, 75, 75, 75, 75, 75, 75, 75, 75]}

 27%|██▋       | 269998/1000000 [5:26:20<10:44:49, 18.87it/s]global step 270000, trans_decision ep_re 408.2944000511147

{"global_step": 270000, "eval_re": [408.2944000511147, 408.2944000511147, 
408.2944000511147, 408.2944000511147, 408.2944000511147, 408.2944000511147, 
408.2944000511147, 408.2944000511147, 408.2944000511147, 408.2944000511147], 
"eval_len": [75, 75, 75, 75, 75, 75, 75, 75, 75, 75]}

 28%|██▊       | 279998/1000000 [5:38:30<10:37:17, 18.83it/s]global step 280000, trans_decision ep_re 408.8539780977659

{"global_step": 280000, "eval_re": [408.8539780977659, 408.8539780977659, 
408.8539780977659, 408.8539780977659, 408.8539780977659, 408.8539780977659, 
408.8539780977659, 408.8539780977659, 408.8539780977659, 408.8539780977659], 
"eval_len": [75, 75, 75, 75, 75, 75, 75, 75, 75, 75]}

 29%|██▉       | 289998/1000000 [5:50:50<10:24:42, 18.94it/s]global step 290000, trans_decision ep_re 395.0320138128704

{"global_step": 290000, "eval_re": [395.0320138128704, 395.0320138128704, 
395.0320138128704, 395.0320138128704, 395.0320138128704, 395.0320138128704, 
395.0320138128704, 395.0320138128704, 395.0320138128704, 395.0320138128704], 
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 30%|██▉       | 299998/1000000 [6:03:10<10:17:09, 18.90it/s]global step 300000, trans_decision ep_re 421.71075150501093

{"global_step": 300000, "eval_re": [421.7107515050109, 421.7107515050109, 
421.7107515050109, 421.7107515050109, 421.7107515050109, 421.7107515050109, 
421.7107515050109, 421.7107515050109, 421.7107515050109, 421.7107515050109], 
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 31%|███       | 309998/1000000 [6:15:20<10:12:00, 18.79it/s]global step 310000, trans_decision ep_re 409.88194071421196

{"global_step": 310000, "eval_re": [409.88194071421196, 409.88194071421196, 
409.88194071421196, 409.88194071421196, 409.88194071421196, 409.88194071421196, 
409.88194071421196, 409.88194071421196, 409.88194071421196, 409.88194071421196],
"eval_len": [75, 75, 75, 75, 75, 75, 75, 75, 75, 75]}

 32%|███▏      | 319998/1000000 [6:27:40<10:04:52, 18.74it/s]global step 320000, trans_decision ep_re 422.2601769295892

{"global_step": 320000, "eval_re": [422.2601769295892, 422.2601769295892, 
422.2601769295892, 422.2601769295892, 422.2601769295892, 422.2601769295892, 
422.2601769295892, 422.2601769295892, 422.2601769295892, 422.2601769295892], 
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 33%|███▎      | 329998/1000000 [6:39:50<9:59:05, 18.64it/s]global step 330000, trans_decision ep_re 438.9817799527731

{"global_step": 330000, "eval_re": [438.98177995277314, 438.98177995277314, 
438.98177995277314, 438.98177995277314, 438.98177995277314, 438.98177995277314, 
438.98177995277314, 438.98177995277314, 438.98177995277314, 438.98177995277314],
"eval_len": [80, 80, 80, 80, 80, 80, 80, 80, 80, 80]}

 34%|███▍      | 339998/1000000 [6:52:10<9:48:12, 18.70it/s]global step 340000, trans_decision ep_re 428.02167022238564

{"global_step": 340000, "eval_re": [428.02167022238564, 428.02167022238564, 
428.02167022238564, 428.02167022238564, 428.02167022238564, 428.02167022238564, 
428.02167022238564, 428.02167022238564, 428.02167022238564, 428.02167022238564],
"eval_len": [78, 78, 78, 78, 78, 78, 78, 78, 78, 78]}

 35%|███▍      | 349998/1000000 [7:04:30<9:35:26, 18.83it/s]global step 350000, trans_decision ep_re 428.3818970234826

{"global_step": 350000, "eval_re": [428.3818970234826, 428.3818970234826, 
428.3818970234826, 428.3818970234826, 428.3818970234826, 428.3818970234826, 
428.3818970234826, 428.3818970234826, 428.3818970234826, 428.3818970234826], 
"eval_len": [78, 78, 78, 78, 78, 78, 78, 78, 78, 78]}

 36%|███▌      | 359998/1000000 [7:16:50<9:27:15, 18.80it/s]global step 360000, trans_decision ep_re 442.03410680140803

{"global_step": 360000, "eval_re": [442.03410680140803, 442.03410680140803, 
442.03410680140803, 442.03410680140803, 442.03410680140803, 442.03410680140803, 
442.03410680140803, 442.03410680140803, 442.03410680140803, 442.03410680140803],
"eval_len": [81, 81, 81, 81, 81, 81, 81, 81, 81, 81]}

 37%|███▋      | 369998/1000000 [7:29:10<9:16:01, 18.88it/s]global step 370000, trans_decision ep_re 425.2525268573469

{"global_step": 370000, "eval_re": [425.2525268573469, 425.2525268573469, 
425.2525268573469, 425.2525268573469, 425.2525268573469, 425.2525268573469, 
425.2525268573469, 425.2525268573469, 425.2525268573469, 425.2525268573469], 
"eval_len": [78, 78, 78, 78, 78, 78, 78, 78, 78, 78]}

 38%|███▊      | 379998/1000000 [7:41:20<9:12:56, 18.69it/s]global step 380000, trans_decision ep_re 398.3180699277433

{"global_step": 380000, "eval_re": [398.3180699277433, 398.3180699277433, 
398.3180699277433, 398.3180699277433, 398.3180699277433, 398.3180699277433, 
398.3180699277433, 398.3180699277433, 398.3180699277433, 398.3180699277433], 
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 39%|███▉      | 389998/1000000 [7:53:40<9:02:25, 18.74it/s]global step 390000, trans_decision ep_re 416.1994671444625

{"global_step": 390000, "eval_re": [416.1994671444625, 416.1994671444625, 
416.1994671444625, 416.1994671444625, 416.1994671444625, 416.1994671444625, 
416.1994671444625, 416.1994671444625, 416.1994671444625, 416.1994671444625], 
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 40%|███▉      | 399998/1000000 [8:06:00<8:51:59, 18.80it/s]global step 400000, trans_decision ep_re 430.4552645697772

{"global_step": 400000, "eval_re": [430.4552645697772, 430.4552645697772, 
430.4552645697772, 430.4552645697772, 430.4552645697772, 430.4552645697772, 
430.4552645697772, 430.4552645697772, 430.4552645697772, 430.4552645697772], 
"eval_len": [78, 78, 78, 78, 78, 78, 78, 78, 78, 78]}

 41%|████      | 409998/1000000 [8:18:10<8:42:44, 18.81it/s]global step 410000, trans_decision ep_re 415.2218798083812

{"global_step": 410000, "eval_re": [415.2218798083812, 415.2218798083812, 
415.2218798083812, 415.2218798083812, 415.2218798083812, 415.2218798083812, 
415.2218798083812, 415.2218798083812, 415.2218798083812, 415.2218798083812], 
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 42%|████▏     | 419998/1000000 [8:30:30<8:32:47, 18.85it/s]global step 420000, trans_decision ep_re 428.45843778414326

{"global_step": 420000, "eval_re": [428.4584377841433, 428.4584377841433, 
428.4584377841433, 428.4584377841433, 428.4584377841433, 428.4584377841433, 
428.4584377841433, 428.4584377841433, 428.4584377841433, 428.4584377841433], 
"eval_len": [78, 78, 78, 78, 78, 78, 78, 78, 78, 78]}

 43%|████▎     | 429998/1000000 [8:42:50<8:27:25, 18.72it/s]global step 430000, trans_decision ep_re 422.8842454302718

{"global_step": 430000, "eval_re": [422.8842454302718, 422.8842454302718, 
422.8842454302718, 422.8842454302718, 422.8842454302718, 422.8842454302718, 
422.8842454302718, 422.8842454302718, 422.8842454302718, 422.8842454302718], 
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 44%|████▍     | 439998/1000000 [8:55:00<8:17:40, 18.75it/s]global step 440000, trans_decision ep_re 415.93072335368606

{"global_step": 440000, "eval_re": [415.93072335368606, 415.93072335368606, 
415.93072335368606, 415.93072335368606, 415.93072335368606, 415.93072335368606, 
415.93072335368606, 415.93072335368606, 415.93072335368606, 415.93072335368606],
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 45%|████▍     | 449998/1000000 [9:07:20<8:07:37, 18.80it/s]global step 450000, trans_decision ep_re 421.92071800243247

{"global_step": 450000, "eval_re": [421.9207180024325, 421.9207180024325, 
421.9207180024325, 421.9207180024325, 421.9207180024325, 421.9207180024325, 
421.9207180024325, 421.9207180024325, 421.9207180024325, 421.9207180024325], 
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 46%|████▌     | 459998/1000000 [9:19:40<7:58:33, 18.81it/s]global step 460000, trans_decision ep_re 423.46602519719943

{"global_step": 460000, "eval_re": [423.46602519719943, 423.46602519719943, 
423.46602519719943, 423.46602519719943, 423.46602519719943, 423.46602519719943, 
423.46602519719943, 423.46602519719943, 423.46602519719943, 423.46602519719943],
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 47%|████▋     | 469998/1000000 [9:32:00<7:49:21, 18.82it/s]global step 470000, trans_decision ep_re 414.530944369383

{"global_step": 470000, "eval_re": [414.53094436938295, 414.53094436938295, 
414.53094436938295, 414.53094436938295, 414.53094436938295, 414.53094436938295, 
414.53094436938295, 414.53094436938295, 414.53094436938295, 414.53094436938295],
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 48%|████▊     | 479998/1000000 [9:44:10<7:37:44, 18.93it/s]global step 480000, trans_decision ep_re 425.7054673653014

{"global_step": 480000, "eval_re": [425.7054673653014, 425.7054673653014, 
425.7054673653014, 425.7054673653014, 425.7054673653014, 425.7054673653014, 
425.7054673653014, 425.7054673653014, 425.7054673653014, 425.7054673653014], 
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 49%|████▉     | 489998/1000000 [9:56:30<7:32:38, 18.78it/s]global step 490000, trans_decision ep_re 429.9756676175256

{"global_step": 490000, "eval_re": [429.9756676175256, 429.9756676175256, 
429.9756676175256, 429.9756676175256, 429.9756676175256, 429.9756676175256, 
429.9756676175256, 429.9756676175256, 429.9756676175256, 429.9756676175256], 
"eval_len": [78, 78, 78, 78, 78, 78, 78, 78, 78, 78]}

 50%|████▉     | 499998/1000000 [10:08:50<7:21:51, 18.86it/s]global step 500000, trans_decision ep_re 417.82264365199853

{"global_step": 500000, "eval_re": [417.82264365199853, 417.82264365199853, 
417.82264365199853, 417.82264365199853, 417.82264365199853, 417.82264365199853, 
417.82264365199853, 417.82264365199853, 417.82264365199853, 417.82264365199853],
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 51%|█████     | 509998/1000000 [10:21:00<7:10:09, 18.99it/s]global step 510000, trans_decision ep_re 406.5955796616759

{"global_step": 510000, "eval_re": [406.5955796616759, 406.5955796616759, 
406.5955796616759, 406.5955796616759, 406.5955796616759, 406.5955796616759, 
406.5955796616759, 406.5955796616759, 406.5955796616759, 406.5955796616759], 
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 52%|█████▏    | 519998/1000000 [10:33:20<7:07:14, 18.72it/s]global step 520000, trans_decision ep_re 263.7820754451619

{"global_step": 520000, "eval_re": [263.782075445162, 263.782075445162, 
263.782075445162, 263.782075445162, 263.782075445162, 263.782075445162, 
263.782075445162, 263.782075445162, 263.782075445162, 263.782075445162], 
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 53%|█████▎    | 529998/1000000 [10:45:40<6:56:43, 18.80it/s]global step 530000, trans_decision ep_re 422.89477184004

{"global_step": 530000, "eval_re": [422.89477184004, 422.89477184004, 
422.89477184004, 422.89477184004, 422.89477184004, 422.89477184004, 
422.89477184004, 422.89477184004, 422.89477184004, 422.89477184004], "eval_len":
[77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 54%|█████▍    | 539998/1000000 [10:57:50<6:47:43, 18.80it/s]global step 540000, trans_decision ep_re 401.3108738559671

{"global_step": 540000, "eval_re": [401.3108738559671, 401.3108738559671, 
401.3108738559671, 401.3108738559671, 401.3108738559671, 401.3108738559671, 
401.3108738559671, 401.3108738559671, 401.3108738559671, 401.3108738559671], 
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 55%|█████▍    | 549998/1000000 [11:10:10<6:38:50, 18.80it/s]global step 550000, trans_decision ep_re 424.78318430211664

{"global_step": 550000, "eval_re": [424.7831843021167, 424.7831843021167, 
424.7831843021167, 424.7831843021167, 424.7831843021167, 424.7831843021167, 
424.7831843021167, 424.7831843021167, 424.7831843021167, 424.7831843021167], 
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 56%|█████▌    | 559998/1000000 [11:22:30<6:26:42, 18.96it/s]global step 560000, trans_decision ep_re 394.17121273626196

{"global_step": 560000, "eval_re": [394.17121273626196, 394.17121273626196, 
394.17121273626196, 394.17121273626196, 394.17121273626196, 394.17121273626196, 
394.17121273626196, 394.17121273626196, 394.17121273626196, 394.17121273626196],
"eval_len": [72, 72, 72, 72, 72, 72, 72, 72, 72, 72]}

 57%|█████▋    | 569998/1000000 [11:34:40<6:19:45, 18.87it/s]global step 570000, trans_decision ep_re 417.2137634487326

{"global_step": 570000, "eval_re": [417.2137634487326, 417.2137634487326, 
417.2137634487326, 417.2137634487326, 417.2137634487326, 417.2137634487326, 
417.2137634487326, 417.2137634487326, 417.2137634487326, 417.2137634487326], 
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 58%|█████▊    | 579998/1000000 [11:47:00<6:09:44, 18.93it/s]global step 580000, trans_decision ep_re 263.2201791999934

{"global_step": 580000, "eval_re": [263.2201791999935, 263.2201791999935, 
263.2201791999935, 263.2201791999935, 263.2201791999935, 263.2201791999935, 
263.2201791999935, 263.2201791999935, 263.2201791999935, 263.2201791999935], 
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 59%|█████▉    | 589998/1000000 [11:59:10<6:03:38, 18.79it/s]global step 590000, trans_decision ep_re 274.7901769960548

{"global_step": 590000, "eval_re": [274.79017699605475, 274.79017699605475, 
274.79017699605475, 274.79017699605475, 274.79017699605475, 274.79017699605475, 
274.79017699605475, 274.79017699605475, 274.79017699605475, 274.79017699605475],
"eval_len": [52, 52, 52, 52, 52, 52, 52, 52, 52, 52]}

 60%|█████▉    | 599998/1000000 [12:11:30<5:54:51, 18.79it/s]global step 600000, trans_decision ep_re 404.2258133587296

{"global_step": 600000, "eval_re": [404.22581335872957, 404.22581335872957, 
404.22581335872957, 404.22581335872957, 404.22581335872957, 404.22581335872957, 
404.22581335872957, 404.22581335872957, 404.22581335872957, 404.22581335872957],
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 61%|██████    | 609998/1000000 [12:23:40<5:44:56, 18.84it/s]global step 610000, trans_decision ep_re 269.1530082609398

{"global_step": 610000, "eval_re": [269.1530082609398, 269.1530082609398, 
269.1530082609398, 269.1530082609398, 269.1530082609398, 269.1530082609398, 
269.1530082609398, 269.1530082609398, 269.1530082609398, 269.1530082609398], 
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 62%|██████▏   | 619998/1000000 [12:36:00<5:36:04, 18.84it/s]global step 620000, trans_decision ep_re 269.2367239164659

{"global_step": 620000, "eval_re": [269.2367239164659, 269.2367239164659, 
269.2367239164659, 269.2367239164659, 269.2367239164659, 269.2367239164659, 
269.2367239164659, 269.2367239164659, 269.2367239164659, 269.2367239164659], 
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 63%|██████▎   | 629998/1000000 [12:48:10<5:27:59, 18.80it/s]global step 630000, trans_decision ep_re 402.7990546090226

{"global_step": 630000, "eval_re": [402.79905460902256, 402.79905460902256, 
402.79905460902256, 402.79905460902256, 402.79905460902256, 402.79905460902256, 
402.79905460902256, 402.79905460902256, 402.79905460902256, 402.79905460902256],
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 64%|██████▍   | 639998/1000000 [13:00:30<5:17:51, 18.88it/s]global step 640000, trans_decision ep_re 438.17361120914137

{"global_step": 640000, "eval_re": [438.1736112091414, 438.1736112091414, 
438.1736112091414, 438.1736112091414, 438.1736112091414, 438.1736112091414, 
438.1736112091414, 438.1736112091414, 438.1736112091414, 438.1736112091414], 
"eval_len": [79, 79, 79, 79, 79, 79, 79, 79, 79, 79]}

 65%|██████▍   | 649998/1000000 [13:12:40<5:09:50, 18.83it/s]global step 650000, trans_decision ep_re 258.19259620255843

{"global_step": 650000, "eval_re": [258.19259620255843, 258.19259620255843, 
258.19259620255843, 258.19259620255843, 258.19259620255843, 258.19259620255843, 
258.19259620255843, 258.19259620255843, 258.19259620255843, 258.19259620255843],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 66%|██████▌   | 659998/1000000 [13:25:00<5:01:38, 18.79it/s]global step 660000, trans_decision ep_re 263.77784377064165

{"global_step": 660000, "eval_re": [263.7778437706417, 263.7778437706417, 
263.7778437706417, 263.7778437706417, 263.7778437706417, 263.7778437706417, 
263.7778437706417, 263.7778437706417, 263.7778437706417, 263.7778437706417], 
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 67%|██████▋   | 669998/1000000 [13:37:20<4:52:32, 18.80it/s]global step 670000, trans_decision ep_re 269.8614358709752

{"global_step": 670000, "eval_re": [269.8614358709752, 269.8614358709752, 
269.8614358709752, 269.8614358709752, 269.8614358709752, 269.8614358709752, 
269.8614358709752, 269.8614358709752, 269.8614358709752, 269.8614358709752], 
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 68%|██████▊   | 679998/1000000 [13:49:30<4:42:57, 18.85it/s]global step 680000, trans_decision ep_re 258.6871447256197

{"global_step": 680000, "eval_re": [258.68714472561965, 258.68714472561965, 
258.68714472561965, 258.68714472561965, 258.68714472561965, 258.68714472561965, 
258.68714472561965, 258.68714472561965, 258.68714472561965, 258.68714472561965],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 69%|██████▉   | 689998/1000000 [14:01:50<4:34:26, 18.83it/s]global step 690000, trans_decision ep_re 275.2872731954183

{"global_step": 690000, "eval_re": [275.28727319541827, 275.28727319541827, 
275.28727319541827, 275.28727319541827, 275.28727319541827, 275.28727319541827, 
275.28727319541827, 275.28727319541827, 275.28727319541827, 275.28727319541827],
"eval_len": [52, 52, 52, 52, 52, 52, 52, 52, 52, 52]}

 70%|██████▉   | 699998/1000000 [14:14:00<4:26:59, 18.73it/s]global step 700000, trans_decision ep_re 410.373173487713

{"global_step": 700000, "eval_re": [410.3731734877131, 410.3731734877131, 
410.3731734877131, 410.3731734877131, 410.3731734877131, 410.3731734877131, 
410.3731734877131, 410.3731734877131, 410.3731734877131, 410.3731734877131], 
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 71%|███████   | 709998/1000000 [14:26:21<4:18:01, 18.73it/s]global step 710000, trans_decision ep_re 269.74089581424835

{"global_step": 710000, "eval_re": [269.74089581424835, 269.74089581424835, 
269.74089581424835, 269.74089581424835, 269.74089581424835, 269.74089581424835, 
269.74089581424835, 269.74089581424835, 269.74089581424835, 269.74089581424835],
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 72%|███████▏  | 719998/1000000 [14:38:41<4:08:29, 18.78it/s]global step 720000, trans_decision ep_re 275.62527247760175

{"global_step": 720000, "eval_re": [275.62527247760175, 275.62527247760175, 
275.62527247760175, 275.62527247760175, 275.62527247760175, 275.62527247760175, 
275.62527247760175, 275.62527247760175, 275.62527247760175, 275.62527247760175],
"eval_len": [52, 52, 52, 52, 52, 52, 52, 52, 52, 52]}

 73%|███████▎  | 729998/1000000 [14:50:51<3:58:17, 18.88it/s]global step 730000, trans_decision ep_re 269.7781892744048

{"global_step": 730000, "eval_re": [269.77818927440484, 269.77818927440484, 
269.77818927440484, 269.77818927440484, 269.77818927440484, 269.77818927440484, 
269.77818927440484, 269.77818927440484, 269.77818927440484, 269.77818927440484],
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 74%|███████▍  | 739998/1000000 [15:03:11<3:51:21, 18.73it/s]global step 740000, trans_decision ep_re 247.6432628650544

{"global_step": 740000, "eval_re": [247.64326286505434, 247.64326286505434, 
247.64326286505434, 247.64326286505434, 247.64326286505434, 247.64326286505434, 
247.64326286505434, 247.64326286505434, 247.64326286505434, 247.64326286505434],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 75%|███████▍  | 749998/1000000 [15:15:21<3:40:47, 18.87it/s]global step 750000, trans_decision ep_re 236.43380102895216

{"global_step": 750000, "eval_re": [236.4338010289522, 236.4338010289522, 
236.4338010289522, 236.4338010289522, 236.4338010289522, 236.4338010289522, 
236.4338010289522, 236.4338010289522, 236.4338010289522, 236.4338010289522], 
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 76%|███████▌  | 759998/1000000 [15:27:41<3:32:52, 18.79it/s]global step 760000, trans_decision ep_re 280.8511221749364

{"global_step": 760000, "eval_re": [280.8511221749364, 280.8511221749364, 
280.8511221749364, 280.8511221749364, 280.8511221749364, 280.8511221749364, 
280.8511221749364, 280.8511221749364, 280.8511221749364, 280.8511221749364], 
"eval_len": [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]}

 77%|███████▋  | 769998/1000000 [15:39:51<3:24:32, 18.74it/s]global step 770000, trans_decision ep_re 269.69388578663705

{"global_step": 770000, "eval_re": [269.69388578663705, 269.69388578663705, 
269.69388578663705, 269.69388578663705, 269.69388578663705, 269.69388578663705, 
269.69388578663705, 269.69388578663705, 269.69388578663705, 269.69388578663705],
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 78%|███████▊  | 779998/1000000 [15:52:11<3:14:43, 18.83it/s]global step 780000, trans_decision ep_re 280.4707776262621

{"global_step": 780000, "eval_re": [280.4707776262621, 280.4707776262621, 
280.4707776262621, 280.4707776262621, 280.4707776262621, 280.4707776262621, 
280.4707776262621, 280.4707776262621, 280.4707776262621, 280.4707776262621], 
"eval_len": [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]}

 79%|███████▉  | 789998/1000000 [16:04:21<3:05:53, 18.83it/s]global step 790000, trans_decision ep_re 253.16963572962294

{"global_step": 790000, "eval_re": [253.16963572962297, 253.16963572962297, 
253.16963572962297, 253.16963572962297, 253.16963572962297, 253.16963572962297, 
253.16963572962297, 253.16963572962297, 253.16963572962297, 253.16963572962297],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 80%|███████▉  | 799998/1000000 [16:16:41<2:57:58, 18.73it/s]global step 800000, trans_decision ep_re 252.97654888729397

{"global_step": 800000, "eval_re": [252.97654888729397, 252.97654888729397, 
252.97654888729397, 252.97654888729397, 252.97654888729397, 252.97654888729397, 
252.97654888729397, 252.97654888729397, 252.97654888729397, 252.97654888729397],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 81%|████████  | 809998/1000000 [16:29:01<2:48:56, 18.74it/s]global step 810000, trans_decision ep_re 247.47908348987548

{"global_step": 810000, "eval_re": [247.47908348987548, 247.47908348987548, 
247.47908348987548, 247.47908348987548, 247.47908348987548, 247.47908348987548, 
247.47908348987548, 247.47908348987548, 247.47908348987548, 247.47908348987548],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 82%|████████▏ | 819998/1000000 [16:41:11<2:39:00, 18.87it/s]global step 820000, trans_decision ep_re 242.00352116179744

{"global_step": 820000, "eval_re": [242.00352116179744, 242.00352116179744, 
242.00352116179744, 242.00352116179744, 242.00352116179744, 242.00352116179744, 
242.00352116179744, 242.00352116179744, 242.00352116179744, 242.00352116179744],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

 83%|████████▎ | 829998/1000000 [16:53:41<2:30:54, 18.78it/s]global step 830000, trans_decision ep_re 242.24891342882938

{"global_step": 830000, "eval_re": [242.24891342882938, 242.24891342882938, 
242.24891342882938, 242.24891342882938, 242.24891342882938, 242.24891342882938, 
242.24891342882938, 242.24891342882938, 242.24891342882938, 242.24891342882938],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

 84%|████████▍ | 839998/1000000 [17:05:51<2:23:21, 18.60it/s]global step 840000, trans_decision ep_re 252.9397009231087

{"global_step": 840000, "eval_re": [252.9397009231087, 252.9397009231087, 
252.9397009231087, 252.9397009231087, 252.9397009231087, 252.9397009231087, 
252.9397009231087, 252.9397009231087, 252.9397009231087, 252.9397009231087], 
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 85%|████████▍ | 849998/1000000 [17:18:21<2:13:34, 18.72it/s]global step 850000, trans_decision ep_re 258.8620898874559

{"global_step": 850000, "eval_re": [258.86208988745585, 258.86208988745585, 
258.86208988745585, 258.86208988745585, 258.86208988745585, 258.86208988745585, 
258.86208988745585, 258.86208988745585, 258.86208988745585, 258.86208988745585],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 86%|████████▌ | 859998/1000000 [17:30:41<2:04:42, 18.71it/s]global step 860000, trans_decision ep_re 258.3481386578518

{"global_step": 860000, "eval_re": [258.34813865785173, 258.34813865785173, 
258.34813865785173, 258.34813865785173, 258.34813865785173, 258.34813865785173, 
258.34813865785173, 258.34813865785173, 258.34813865785173, 258.34813865785173],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 87%|████████▋ | 869998/1000000 [17:42:51<1:55:01, 18.84it/s]global step 870000, trans_decision ep_re 264.40147632025173

{"global_step": 870000, "eval_re": [264.40147632025173, 264.40147632025173, 
264.40147632025173, 264.40147632025173, 264.40147632025173, 264.40147632025173, 
264.40147632025173, 264.40147632025173, 264.40147632025173, 264.40147632025173],
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 88%|████████▊ | 879998/1000000 [17:55:11<1:46:06, 18.85it/s]global step 880000, trans_decision ep_re 242.36542727292982

{"global_step": 880000, "eval_re": [242.3654272729298, 242.3654272729298, 
242.3654272729298, 242.3654272729298, 242.3654272729298, 242.3654272729298, 
242.3654272729298, 242.3654272729298, 242.3654272729298, 242.3654272729298], 
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

 89%|████████▉ | 889998/1000000 [18:07:21<1:38:26, 18.62it/s]global step 890000, trans_decision ep_re 241.7444305985834

{"global_step": 890000, "eval_re": [241.74443059858342, 241.74443059858342, 
241.74443059858342, 241.74443059858342, 241.74443059858342, 241.74443059858342, 
241.74443059858342, 241.74443059858342, 241.74443059858342, 241.74443059858342],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

 90%|████████▉ | 899998/1000000 [18:19:51<1:29:38, 18.59it/s]global step 900000, trans_decision ep_re 253.3741323208938

{"global_step": 900000, "eval_re": [253.3741323208938, 253.3741323208938, 
253.3741323208938, 253.3741323208938, 253.3741323208938, 253.3741323208938, 
253.3741323208938, 253.3741323208938, 253.3741323208938, 253.3741323208938], 
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 91%|█████████ | 909998/1000000 [18:32:11<1:21:02, 18.51it/s]global step 910000, trans_decision ep_re 247.64162801648345

{"global_step": 910000, "eval_re": [247.64162801648342, 247.64162801648342, 
247.64162801648342, 247.64162801648342, 247.64162801648342, 247.64162801648342, 
247.64162801648342, 247.64162801648342, 247.64162801648342, 247.64162801648342],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 92%|█████████▏| 919998/1000000 [18:44:31<1:12:15, 18.45it/s]global step 920000, trans_decision ep_re 236.8950503274945

{"global_step": 920000, "eval_re": [236.89505032749446, 236.89505032749446, 
236.89505032749446, 236.89505032749446, 236.89505032749446, 236.89505032749446, 
236.89505032749446, 236.89505032749446, 236.89505032749446, 236.89505032749446],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 93%|█████████▎| 929998/1000000 [18:57:01<1:02:20, 18.71it/s]global step 930000, trans_decision ep_re 248.06121565461854

{"global_step": 930000, "eval_re": [248.06121565461854, 248.06121565461854, 
248.06121565461854, 248.06121565461854, 248.06121565461854, 248.06121565461854, 
248.06121565461854, 248.06121565461854, 248.06121565461854, 248.06121565461854],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 94%|█████████▍| 939998/1000000 [19:09:21<53:47, 18.59it/s]global step 940000, trans_decision ep_re 231.27459476439836

{"global_step": 940000, "eval_re": [231.27459476439836, 231.27459476439836, 
231.27459476439836, 231.27459476439836, 231.27459476439836, 231.27459476439836, 
231.27459476439836, 231.27459476439836, 231.27459476439836, 231.27459476439836],
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 95%|█████████▍| 949998/1000000 [19:21:41<45:06, 18.48it/s]global step 950000, trans_decision ep_re 231.38227460792592

{"global_step": 950000, "eval_re": [231.38227460792592, 231.38227460792592, 
231.38227460792592, 231.38227460792592, 231.38227460792592, 231.38227460792592, 
231.38227460792592, 231.38227460792592, 231.38227460792592, 231.38227460792592],
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 96%|█████████▌| 959998/1000000 [19:34:01<36:01, 18.51it/s]global step 960000, trans_decision ep_re 253.52310943941384

{"global_step": 960000, "eval_re": [253.52310943941384, 253.52310943941384, 
253.52310943941384, 253.52310943941384, 253.52310943941384, 253.52310943941384, 
253.52310943941384, 253.52310943941384, 253.52310943941384, 253.52310943941384],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 97%|█████████▋| 969998/1000000 [19:46:31<27:09, 18.41it/s]global step 970000, trans_decision ep_re 236.5594338356522

{"global_step": 970000, "eval_re": [236.55943383565216, 236.55943383565216, 
236.55943383565216, 236.55943383565216, 236.55943383565216, 236.55943383565216, 
236.55943383565216, 236.55943383565216, 236.55943383565216, 236.55943383565216],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 98%|█████████▊| 979998/1000000 [19:58:51<18:12, 18.31it/s]global step 980000, trans_decision ep_re 236.58555618877327

{"global_step": 980000, "eval_re": [236.58555618877324, 236.58555618877324, 
236.58555618877324, 236.58555618877324, 236.58555618877324, 236.58555618877324, 
236.58555618877324, 236.58555618877324, 236.58555618877324, 236.58555618877324],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 99%|█████████▉| 989998/1000000 [20:11:21<09:01, 18.49it/s]global step 990000, trans_decision ep_re 247.9594887930824

{"global_step": 990000, "eval_re": [247.9594887930824, 247.9594887930824, 
247.9594887930824, 247.9594887930824, 247.9594887930824, 247.9594887930824, 
247.9594887930824, 247.9594887930824, 247.9594887930824, 247.9594887930824], 
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

100%|█████████▉| 999998/1000000 [20:23:41<00:00, 18.47it/s]global step 1000000, trans_decision ep_re 242.87616704669395

{"global_step": 1000000, "eval_re": [242.87616704669398, 242.87616704669398, 
242.87616704669398, 242.87616704669398, 242.87616704669398, 242.87616704669398, 
242.87616704669398, 242.87616704669398, 242.87616704669398, 242.87616704669398],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

100%|██████████| 1000000/1000000 [20:23:47<00:00, 13.62it/s]
