
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.0
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:55<9:40:07, 28.44it/s]global step 10000, trans_decision ep_re 218.86300263427628

{"global_step": 10000, "eval_re": [218.86300263427628, 218.86300263427628, 
218.86300263427628, 218.86300263427628, 218.86300263427628, 218.86300263427628, 
218.86300263427628, 218.86300263427628, 218.86300263427628, 218.86300263427628],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

  2%|▏         | 19997/1000000 [12:00<9:39:22, 28.19it/s]global step 20000, trans_decision ep_re 375.39406233944885

{"global_step": 20000, "eval_re": [375.3940623394489, 375.3940623394489, 
375.3940623394489, 375.3940623394489, 375.3940623394489, 375.3940623394489, 
375.3940623394489, 375.3940623394489, 375.3940623394489, 375.3940623394489], 
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

  3%|▎         | 29997/1000000 [19:50<9:36:43, 28.03it/s]global step 30000, trans_decision ep_re 313.6726468669973

{"global_step": 30000, "eval_re": [313.67264686699735, 313.67264686699735, 
313.67264686699735, 313.67264686699735, 313.67264686699735, 313.67264686699735, 
313.67264686699735, 313.67264686699735, 313.67264686699735, 313.67264686699735],
"eval_len": [60, 60, 60, 60, 60, 60, 60, 60, 60, 60]}

  4%|▍         | 39998/1000000 [27:31<9:22:50, 28.43it/s]global step 40000, trans_decision ep_re 350.0456522636024

{"global_step": 40000, "eval_re": [350.04565226360245, 350.04565226360245, 
350.04565226360245, 350.04565226360245, 350.04565226360245, 350.04565226360245, 
350.04565226360245, 350.04565226360245, 350.04565226360245, 350.04565226360245],
"eval_len": [66, 66, 66, 66, 66, 66, 66, 66, 66, 66]}

  5%|▍         | 49998/1000000 [35:24<9:17:18, 28.41it/s]global step 50000, trans_decision ep_re 324.12343193554386

{"global_step": 50000, "eval_re": [324.1234319355438, 324.1234319355438, 
324.1234319355438, 324.1234319355438, 324.1234319355438, 324.1234319355438, 
324.1234319355438, 324.1234319355438, 324.1234319355438, 324.1234319355438], 
"eval_len": [62, 62, 62, 62, 62, 62, 62, 62, 62, 62]}

  6%|▌         | 59999/1000000 [43:16<9:19:57, 27.98it/s]global step 60000, trans_decision ep_re 224.91938766797415

{"global_step": 60000, "eval_re": [224.91938766797415, 224.91938766797415, 
224.91938766797415, 224.91938766797415, 224.91938766797415, 224.91938766797415, 
224.91938766797415, 224.91938766797415, 224.91938766797415, 224.91938766797415],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

  7%|▋         | 69997/1000000 [51:20<9:13:10, 28.02it/s]global step 70000, trans_decision ep_re 309.6538268641047

{"global_step": 70000, "eval_re": [309.6538268641047, 309.6538268641047, 
309.6538268641047, 309.6538268641047, 309.6538268641047, 309.6538268641047, 
309.6538268641047, 309.6538268641047, 309.6538268641047, 309.6538268641047], 
"eval_len": [60, 60, 60, 60, 60, 60, 60, 60, 60, 60]}

  8%|▊         | 79998/1000000 [59:10<9:02:11, 28.28it/s]global step 80000, trans_decision ep_re 348.0978613694877

{"global_step": 80000, "eval_re": [348.09786136948765, 348.09786136948765, 
348.09786136948765, 348.09786136948765, 348.09786136948765, 348.09786136948765, 
348.09786136948765, 348.09786136948765, 348.09786136948765, 348.09786136948765],
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

  9%|▉         | 89999/1000000 [1:06:51<8:56:02, 28.29it/s]global step 90000, trans_decision ep_re 348.94137527100355

{"global_step": 90000, "eval_re": [348.9413752710036, 348.9413752710036, 
348.9413752710036, 348.9413752710036, 348.9413752710036, 348.9413752710036, 
348.9413752710036, 348.9413752710036, 348.9413752710036, 348.9413752710036], 
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 10%|▉         | 99998/1000000 [1:14:44<8:49:50, 28.31it/s]global step 100000, trans_decision ep_re 346.6494179364696

{"global_step": 100000, "eval_re": [346.6494179364696, 346.6494179364696, 
346.6494179364696, 346.6494179364696, 346.6494179364696, 346.6494179364696, 
346.6494179364696, 346.6494179364696, 346.6494179364696, 346.6494179364696], 
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 11%|█         | 109997/1000000 [1:22:50<8:52:42, 27.85it/s]global step 110000, trans_decision ep_re 305.66632074939616

{"global_step": 110000, "eval_re": [305.66632074939616, 305.66632074939616, 
305.66632074939616, 305.66632074939616, 305.66632074939616, 305.66632074939616, 
305.66632074939616, 305.66632074939616, 305.66632074939616, 305.66632074939616],
"eval_len": [60, 60, 60, 60, 60, 60, 60, 60, 60, 60]}

 12%|█▏        | 119997/1000000 [1:30:30<8:45:40, 27.90it/s]global step 120000, trans_decision ep_re 329.8960134461941

{"global_step": 120000, "eval_re": [329.8960134461941, 329.8960134461941, 
329.8960134461941, 329.8960134461941, 329.8960134461941, 329.8960134461941, 
329.8960134461941, 329.8960134461941, 329.8960134461941, 329.8960134461941], 
"eval_len": [64, 64, 64, 64, 64, 64, 64, 64, 64, 64]}

 13%|█▎        | 129998/1000000 [1:38:23<8:25:41, 28.67it/s]global step 130000, trans_decision ep_re 220.94438748410602

{"global_step": 130000, "eval_re": [220.94438748410602, 220.94438748410602, 
220.94438748410602, 220.94438748410602, 220.94438748410602, 220.94438748410602, 
220.94438748410602, 220.94438748410602, 220.94438748410602, 220.94438748410602],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 14%|█▍        | 139998/1000000 [1:46:14<8:18:23, 28.76it/s]global step 140000, trans_decision ep_re 332.7737798295317

{"global_step": 140000, "eval_re": [332.7737798295317, 332.7737798295317, 
332.7737798295317, 332.7737798295317, 332.7737798295317, 332.7737798295317, 
332.7737798295317, 332.7737798295317, 332.7737798295317, 332.7737798295317], 
"eval_len": [64, 64, 64, 64, 64, 64, 64, 64, 64, 64]}

 15%|█▍        | 149999/1000000 [1:54:05<8:23:01, 28.16it/s]global step 150000, trans_decision ep_re 341.33052911126964

{"global_step": 150000, "eval_re": [341.33052911126964, 341.33052911126964, 
341.33052911126964, 341.33052911126964, 341.33052911126964, 341.33052911126964, 
341.33052911126964, 341.33052911126964, 341.33052911126964, 341.33052911126964],
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

 16%|█▌        | 159997/1000000 [2:02:10<8:17:59, 28.11it/s]global step 160000, trans_decision ep_re 391.313653829113

{"global_step": 160000, "eval_re": [391.31365382911304, 391.31365382911304, 
391.31365382911304, 391.31365382911304, 391.31365382911304, 391.31365382911304, 
391.31365382911304, 391.31365382911304, 391.31365382911304, 391.31365382911304],
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 17%|█▋        | 169999/1000000 [2:10:00<8:14:45, 27.96it/s]global step 170000, trans_decision ep_re 372.3478050405041

{"global_step": 170000, "eval_re": [372.3478050405041, 372.3478050405041, 
372.3478050405041, 372.3478050405041, 372.3478050405041, 372.3478050405041, 
372.3478050405041, 372.3478050405041, 372.3478050405041, 372.3478050405041], 
"eval_len": [70, 70, 70, 70, 70, 70, 70, 70, 70, 70]}

 18%|█▊        | 179999/1000000 [2:17:42<8:09:16, 27.93it/s]global step 180000, trans_decision ep_re 343.5794331888687

{"global_step": 180000, "eval_re": [343.5794331888687, 343.5794331888687, 
343.5794331888687, 343.5794331888687, 343.5794331888687, 343.5794331888687, 
343.5794331888687, 343.5794331888687, 343.5794331888687, 343.5794331888687], 
"eval_len": [66, 66, 66, 66, 66, 66, 66, 66, 66, 66]}

 19%|█▉        | 189998/1000000 [2:25:34<7:53:17, 28.52it/s]global step 190000, trans_decision ep_re 347.5087132001427

{"global_step": 190000, "eval_re": [347.5087132001427, 347.5087132001427, 
347.5087132001427, 347.5087132001427, 347.5087132001427, 347.5087132001427, 
347.5087132001427, 347.5087132001427, 347.5087132001427, 347.5087132001427], 
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 20%|█▉        | 199999/1000000 [2:33:40<7:56:51, 27.96it/s]global step 200000, trans_decision ep_re 357.35797013672334

{"global_step": 200000, "eval_re": [357.35797013672334, 357.35797013672334, 
357.35797013672334, 357.35797013672334, 357.35797013672334, 357.35797013672334, 
357.35797013672334, 357.35797013672334, 357.35797013672334, 357.35797013672334],
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

 21%|██        | 209998/1000000 [2:41:30<7:42:05, 28.49it/s]global step 210000, trans_decision ep_re 358.0545500080034

{"global_step": 210000, "eval_re": [358.0545500080034, 358.0545500080034, 
358.0545500080034, 358.0545500080034, 358.0545500080034, 358.0545500080034, 
358.0545500080034, 358.0545500080034, 358.0545500080034, 358.0545500080034], 
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

 22%|██▏       | 219998/1000000 [2:49:11<7:33:24, 28.67it/s]global step 220000, trans_decision ep_re 354.67459017011606

{"global_step": 220000, "eval_re": [354.67459017011606, 354.67459017011606, 
354.67459017011606, 354.67459017011606, 354.67459017011606, 354.67459017011606, 
354.67459017011606, 354.67459017011606, 354.67459017011606, 354.67459017011606],
"eval_len": [69, 69, 69, 69, 69, 69, 69, 69, 69, 69]}

 23%|██▎       | 229998/1000000 [2:57:03<7:28:03, 28.64it/s]global step 230000, trans_decision ep_re 348.0001247799305

{"global_step": 230000, "eval_re": [348.5030585156423, 348.5030585156423, 
348.5030585156423, 348.5030585156423, 348.5030585156423, 348.5030585156423, 
343.47372115852403, 348.5030585156423, 348.5030585156423, 348.5030585156423], 
"eval_len": [68, 68, 68, 68, 68, 68, 67, 68, 68, 68]}

 24%|██▍       | 239999/1000000 [3:04:55<7:30:49, 28.10it/s]global step 240000, trans_decision ep_re 214.65591232380416

{"global_step": 240000, "eval_re": [214.65591232380416, 214.65591232380416, 
214.65591232380416, 214.65591232380416, 214.65591232380416, 214.65591232380416, 
214.65591232380416, 214.65591232380416, 214.65591232380416, 214.65591232380416],
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 25%|██▍       | 249998/1000000 [3:12:46<7:21:55, 28.29it/s]global step 250000, trans_decision ep_re 204.07706449143225

{"global_step": 250000, "eval_re": [204.07706449143222, 204.07706449143222, 
204.07706449143222, 204.07706449143222, 204.07706449143222, 204.07706449143222, 
204.07706449143222, 204.07706449143222, 204.07706449143222, 204.07706449143222],
"eval_len": [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]}

 26%|██▌       | 259998/1000000 [3:20:50<7:14:37, 28.38it/s]global step 260000, trans_decision ep_re 370.9549996219947

{"global_step": 260000, "eval_re": [370.9549996219947, 370.9549996219947, 
370.9549996219947, 370.9549996219947, 370.9549996219947, 370.9549996219947, 
370.9549996219947, 370.9549996219947, 370.9549996219947, 370.9549996219947], 
"eval_len": [71, 71, 71, 71, 71, 71, 71, 71, 71, 71]}

 27%|██▋       | 269999/1000000 [3:28:30<7:14:37, 27.99it/s]global step 270000, trans_decision ep_re 393.3583433159564

{"global_step": 270000, "eval_re": [393.35834331595635, 393.35834331595635, 
393.35834331595635, 393.35834331595635, 393.35834331595635, 393.35834331595635, 
393.35834331595635, 393.35834331595635, 393.35834331595635, 393.35834331595635],
"eval_len": [75, 75, 75, 75, 75, 75, 75, 75, 75, 75]}

 28%|██▊       | 279999/1000000 [3:36:22<7:09:01, 27.97it/s]global step 280000, trans_decision ep_re 389.4252370178282

{"global_step": 280000, "eval_re": [389.4252370178281, 389.4252370178281, 
389.4252370178281, 389.4252370178281, 389.4252370178281, 389.4252370178281, 
389.4252370178281, 389.4252370178281, 389.4252370178281, 389.4252370178281], 
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 29%|██▉       | 289998/1000000 [3:44:15<6:54:43, 28.53it/s]global step 290000, trans_decision ep_re 204.59609406474573

{"global_step": 290000, "eval_re": [204.5960940647457, 204.5960940647457, 
204.5960940647457, 204.5960940647457, 204.5960940647457, 204.5960940647457, 
204.5960940647457, 204.5960940647457, 204.5960940647457, 204.5960940647457], 
"eval_len": [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]}

 30%|██▉       | 299997/1000000 [3:52:06<6:55:53, 28.05it/s]global step 300000, trans_decision ep_re 221.00810845597135

{"global_step": 300000, "eval_re": [221.00810845597135, 221.00810845597135, 
221.00810845597135, 221.00810845597135, 221.00810845597135, 221.00810845597135, 
221.00810845597135, 221.00810845597135, 221.00810845597135, 221.00810845597135],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 31%|███       | 309997/1000000 [4:00:10<6:51:11, 27.97it/s]global step 310000, trans_decision ep_re 335.91082798924145

{"global_step": 310000, "eval_re": [335.9108279892414, 335.9108279892414, 
335.9108279892414, 335.9108279892414, 335.9108279892414, 335.9108279892414, 
335.9108279892414, 335.9108279892414, 335.9108279892414, 335.9108279892414], 
"eval_len": [65, 65, 65, 65, 65, 65, 65, 65, 65, 65]}

 32%|███▏      | 319999/1000000 [4:08:00<6:44:43, 28.00it/s]global step 320000, trans_decision ep_re 434.58007114872464

{"global_step": 320000, "eval_re": [434.5800711487246, 434.5800711487246, 
434.5800711487246, 434.5800711487246, 434.5800711487246, 434.5800711487246, 
434.5800711487246, 434.5800711487246, 434.5800711487246, 434.5800711487246], 
"eval_len": [82, 82, 82, 82, 82, 82, 82, 82, 82, 82]}

 33%|███▎      | 329999/1000000 [4:15:42<6:35:12, 28.25it/s]global step 330000, trans_decision ep_re 343.2904824994762

{"global_step": 330000, "eval_re": [343.2904824994762, 343.2904824994762, 
343.2904824994762, 343.2904824994762, 343.2904824994762, 343.2904824994762, 
343.2904824994762, 343.2904824994762, 343.2904824994762, 343.2904824994762], 
"eval_len": [66, 66, 66, 66, 66, 66, 66, 66, 66, 66]}

 34%|███▍      | 339997/1000000 [4:23:31<6:31:18, 28.11it/s]global step 340000, trans_decision ep_re 548.9211652887442

{"global_step": 340000, "eval_re": [548.9211652887442, 548.9211652887442, 
548.9211652887442, 548.9211652887442, 548.9211652887442, 548.9211652887442, 
548.9211652887442, 548.9211652887442, 548.9211652887442, 548.9211652887442], 
"eval_len": [99, 99, 99, 99, 99, 99, 99, 99, 99, 99]}

 35%|███▍      | 349999/1000000 [4:31:22<6:22:56, 28.29it/s]global step 350000, trans_decision ep_re 401.2883756340997

{"global_step": 350000, "eval_re": [401.2883756340996, 401.2883756340996, 
401.2883756340996, 401.2883756340996, 401.2883756340996, 401.2883756340996, 
401.2883756340996, 401.2883756340996, 401.2883756340996, 401.2883756340996], 
"eval_len": [76, 76, 76, 76, 76, 76, 76, 76, 76, 76]}

 36%|███▌      | 359997/1000000 [4:39:12<6:18:41, 28.17it/s]global step 360000, trans_decision ep_re 259.0062482188251

{"global_step": 360000, "eval_re": [259.0062482188251, 259.0062482188251, 
259.0062482188251, 259.0062482188251, 259.0062482188251, 259.0062482188251, 
259.0062482188251, 259.0062482188251, 259.0062482188251, 259.0062482188251], 
"eval_len": [52, 52, 52, 52, 52, 52, 52, 52, 52, 52]}

 37%|███▋      | 369997/1000000 [4:47:01<6:10:06, 28.37it/s]global step 370000, trans_decision ep_re 215.0243955187631

{"global_step": 370000, "eval_re": [215.0243955187631, 215.0243955187631, 
215.0243955187631, 215.0243955187631, 215.0243955187631, 215.0243955187631, 
215.0243955187631, 215.0243955187631, 215.0243955187631, 215.0243955187631], 
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 38%|███▊      | 379997/1000000 [4:54:50<6:09:46, 27.95it/s]global step 380000, trans_decision ep_re 220.57974038296524

{"global_step": 380000, "eval_re": [220.57974038296527, 220.57974038296527, 
220.57974038296527, 220.57974038296527, 220.57974038296527, 220.57974038296527, 
220.57974038296527, 220.57974038296527, 220.57974038296527, 220.57974038296527],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 39%|███▉      | 389997/1000000 [5:02:41<5:59:57, 28.24it/s]global step 390000, trans_decision ep_re 242.09425742850334

{"global_step": 390000, "eval_re": [242.09425742850337, 242.09425742850337, 
242.09425742850337, 242.09425742850337, 242.09425742850337, 242.09425742850337, 
242.09425742850337, 242.09425742850337, 242.09425742850337, 242.09425742850337],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 40%|███▉      | 399998/1000000 [5:10:32<5:51:06, 28.48it/s]global step 400000, trans_decision ep_re 248.62728111705763

{"global_step": 400000, "eval_re": [248.6272811170577, 248.6272811170577, 
248.6272811170577, 248.6272811170577, 248.6272811170577, 248.6272811170577, 
248.6272811170577, 248.6272811170577, 248.6272811170577, 248.6272811170577], 
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 41%|████      | 409997/1000000 [5:18:22<5:46:57, 28.34it/s]global step 410000, trans_decision ep_re 254.1890557168941

{"global_step": 410000, "eval_re": [254.18905571689413, 254.18905571689413, 
254.18905571689413, 254.18905571689413, 254.18905571689413, 254.18905571689413, 
254.18905571689413, 254.18905571689413, 254.18905571689413, 254.18905571689413],
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 42%|████▏     | 419998/1000000 [5:26:11<5:36:20, 28.74it/s]global step 420000, trans_decision ep_re 230.750008347691

{"global_step": 420000, "eval_re": [230.75000834769097, 230.75000834769097, 
230.75000834769097, 230.75000834769097, 230.75000834769097, 230.75000834769097, 
230.75000834769097, 230.75000834769097, 230.75000834769097, 230.75000834769097],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 43%|████▎     | 429997/1000000 [5:34:10<5:35:17, 28.33it/s]global step 430000, trans_decision ep_re 354.44954057866596

{"global_step": 430000, "eval_re": [354.4495405786659, 354.4495405786659, 
354.4495405786659, 354.4495405786659, 354.4495405786659, 354.4495405786659, 
354.4495405786659, 354.4495405786659, 354.4495405786659, 354.4495405786659], 
"eval_len": [68, 68, 68, 68, 68, 68, 68, 68, 68, 68]}

 44%|████▍     | 439998/1000000 [5:42:00<5:27:42, 28.48it/s]global step 440000, trans_decision ep_re 257.6256078942291

{"global_step": 440000, "eval_re": [257.62560789422906, 257.62560789422906, 
257.62560789422906, 257.62560789422906, 257.62560789422906, 257.62560789422906, 
257.62560789422906, 257.62560789422906, 257.62560789422906, 257.62560789422906],
"eval_len": [52, 52, 52, 52, 52, 52, 52, 52, 52, 52]}

 45%|████▍     | 449999/1000000 [5:49:50<5:25:19, 28.18it/s]global step 450000, trans_decision ep_re 673.5202485835218

{"global_step": 450000, "eval_re": [673.5202485835218, 673.5202485835218, 
673.5202485835218, 673.5202485835218, 673.5202485835218, 673.5202485835218, 
673.5202485835218, 673.5202485835218, 673.5202485835218, 673.5202485835218], 
"eval_len": [125, 125, 125, 125, 125, 125, 125, 125, 125, 125]}

 46%|████▌     | 459998/1000000 [5:57:40<5:13:32, 28.70it/s]global step 460000, trans_decision ep_re 246.54819687964286

{"global_step": 460000, "eval_re": [246.54819687964283, 246.54819687964283, 
246.54819687964283, 246.54819687964283, 246.54819687964283, 246.54819687964283, 
246.54819687964283, 246.54819687964283, 246.54819687964283, 246.54819687964283],
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 47%|████▋     | 469997/1000000 [6:05:30<5:10:01, 28.49it/s]global step 470000, trans_decision ep_re 237.7182345940857

{"global_step": 470000, "eval_re": [237.71823459408571, 237.71823459408571, 
237.71823459408571, 237.71823459408571, 237.71823459408571, 237.71823459408571, 
237.71823459408571, 237.71823459408571, 237.71823459408571, 237.71823459408571],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 48%|████▊     | 479997/1000000 [6:13:20<5:09:00, 28.05it/s]global step 480000, trans_decision ep_re 237.13376614255645

{"global_step": 480000, "eval_re": [237.13376614255645, 237.13376614255645, 
237.13376614255645, 237.13376614255645, 237.13376614255645, 237.13376614255645, 
237.13376614255645, 237.13376614255645, 237.13376614255645, 237.13376614255645],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 49%|████▉     | 489997/1000000 [6:21:10<5:03:06, 28.04it/s]global step 490000, trans_decision ep_re 236.05749070675793

{"global_step": 490000, "eval_re": [236.0574907067579, 236.0574907067579, 
236.0574907067579, 236.0574907067579, 236.0574907067579, 236.0574907067579, 
236.0574907067579, 236.0574907067579, 236.0574907067579, 236.0574907067579], 
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 50%|████▉     | 499997/1000000 [6:29:00<4:55:34, 28.19it/s]global step 500000, trans_decision ep_re 231.57438305159957

{"global_step": 500000, "eval_re": [231.5743830515996, 231.5743830515996, 
231.5743830515996, 231.5743830515996, 231.5743830515996, 231.5743830515996, 
231.5743830515996, 231.5743830515996, 231.5743830515996, 231.5743830515996], 
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 51%|█████     | 509999/1000000 [6:36:50<4:45:56, 28.56it/s]global step 510000, trans_decision ep_re 529.1142334384443

{"global_step": 510000, "eval_re": [529.1142334384444, 529.1142334384444, 
529.1142334384444, 529.1142334384444, 529.1142334384444, 529.1142334384444, 
529.1142334384444, 529.1142334384444, 529.1142334384444, 529.1142334384444], 
"eval_len": [106, 106, 106, 106, 106, 106, 106, 106, 106, 106]}

 52%|█████▏    | 519997/1000000 [6:44:30<4:43:51, 28.18it/s]global step 520000, trans_decision ep_re 243.07520184289493

{"global_step": 520000, "eval_re": [243.07520184289493, 243.07520184289493, 
243.07520184289493, 243.07520184289493, 243.07520184289493, 243.07520184289493, 
243.07520184289493, 243.07520184289493, 243.07520184289493, 243.07520184289493],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 53%|█████▎    | 529997/1000000 [6:52:21<4:39:07, 28.06it/s]global step 530000, trans_decision ep_re 232.5913946013324

{"global_step": 530000, "eval_re": [232.59139460133244, 232.59139460133244, 
232.59139460133244, 232.59139460133244, 232.59139460133244, 232.59139460133244, 
232.59139460133244, 232.59139460133244, 232.59139460133244, 232.59139460133244],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 54%|█████▍    | 539999/1000000 [7:00:11<4:33:25, 28.04it/s]global step 540000, trans_decision ep_re 194.87677239184717

{"global_step": 540000, "eval_re": [194.87677239184714, 194.87677239184714, 
194.87677239184714, 194.87677239184714, 194.87677239184714, 194.87677239184714, 
194.87677239184714, 194.87677239184714, 194.87677239184714, 194.87677239184714],
"eval_len": [40, 40, 40, 40, 40, 40, 40, 40, 40, 40]}

 55%|█████▍    | 549997/1000000 [7:08:10<4:27:25, 28.05it/s]global step 550000, trans_decision ep_re 189.6168781812941

{"global_step": 550000, "eval_re": [189.6168781812941, 189.6168781812941, 
189.6168781812941, 189.6168781812941, 189.6168781812941, 189.6168781812941, 
189.6168781812941, 189.6168781812941, 189.6168781812941, 189.6168781812941], 
"eval_len": [39, 39, 39, 39, 39, 39, 39, 39, 39, 39]}

 56%|█████▌    | 559998/1000000 [7:16:00<4:15:15, 28.73it/s]global step 560000, trans_decision ep_re 232.67060730585362

{"global_step": 560000, "eval_re": [232.67060730585362, 232.67060730585362, 
232.67060730585362, 232.67060730585362, 232.67060730585362, 232.67060730585362, 
232.67060730585362, 232.67060730585362, 232.67060730585362, 232.67060730585362],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 57%|█████▋    | 569997/1000000 [7:23:50<4:13:49, 28.24it/s]global step 570000, trans_decision ep_re 220.45684012567418

{"global_step": 570000, "eval_re": [220.45684012567418, 220.45684012567418, 
220.45684012567418, 220.45684012567418, 220.45684012567418, 220.45684012567418, 
220.45684012567418, 220.45684012567418, 220.45684012567418, 220.45684012567418],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 58%|█████▊    | 579997/1000000 [7:31:27<4:08:03, 28.22it/s]global step 580000, trans_decision ep_re 205.5616905812386

{"global_step": 580000, "eval_re": [205.5616905812386, 205.5616905812386, 
205.5616905812386, 205.5616905812386, 205.5616905812386, 205.5616905812386, 
205.5616905812386, 205.5616905812386, 205.5616905812386, 205.5616905812386], 
"eval_len": [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]}

 59%|█████▉    | 589999/1000000 [7:39:16<4:00:51, 28.37it/s]global step 590000, trans_decision ep_re 195.8196473416927

{"global_step": 590000, "eval_re": [195.81964734169273, 195.81964734169273, 
195.81964734169273, 195.81964734169273, 195.81964734169273, 195.81964734169273, 
195.81964734169273, 195.81964734169273, 195.81964734169273, 195.81964734169273],
"eval_len": [40, 40, 40, 40, 40, 40, 40, 40, 40, 40]}

 60%|█████▉    | 599998/1000000 [7:47:05<3:51:49, 28.76it/s]global step 600000, trans_decision ep_re 205.49259283528676

{"global_step": 600000, "eval_re": [205.49259283528679, 205.49259283528679, 
205.49259283528679, 205.49259283528679, 205.49259283528679, 205.49259283528679, 
205.49259283528679, 205.49259283528679, 205.49259283528679, 205.49259283528679],
"eval_len": [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]}

 61%|██████    | 609999/1000000 [7:54:54<3:50:08, 28.24it/s]global step 610000, trans_decision ep_re 235.80892144619378

{"global_step": 610000, "eval_re": [234.23119025903287, 234.23119025903287, 
234.23119025903287, 250.00850213064226, 234.23119025903287, 234.23119025903287, 
234.23119025903287, 234.23119025903287, 234.23119025903287, 234.23119025903287],
"eval_len": [47, 47, 47, 50, 47, 47, 47, 47, 47, 47]}

 62%|██████▏   | 619997/1000000 [8:02:44<3:46:39, 27.94it/s]global step 620000, trans_decision ep_re 189.75098787886802

{"global_step": 620000, "eval_re": [189.75098787886802, 189.75098787886802, 
189.75098787886802, 189.75098787886802, 189.75098787886802, 189.75098787886802, 
189.75098787886802, 189.75098787886802, 189.75098787886802, 189.75098787886802],
"eval_len": [39, 39, 39, 39, 39, 39, 39, 39, 39, 39]}

 63%|██████▎   | 629999/1000000 [8:10:33<3:39:27, 28.10it/s]global step 630000, trans_decision ep_re 212.0378602113206

{"global_step": 630000, "eval_re": [212.0378602113206, 212.0378602113206, 
212.0378602113206, 212.0378602113206, 212.0378602113206, 212.0378602113206, 
212.0378602113206, 212.0378602113206, 212.0378602113206, 212.0378602113206], 
"eval_len": [43, 43, 43, 43, 43, 43, 43, 43, 43, 43]}

 64%|██████▍   | 639998/1000000 [8:18:22<3:30:42, 28.48it/s]global step 640000, trans_decision ep_re 201.17638510642556

{"global_step": 640000, "eval_re": [201.17638510642553, 201.17638510642553, 
201.17638510642553, 201.17638510642553, 201.17638510642553, 201.17638510642553, 
201.17638510642553, 201.17638510642553, 201.17638510642553, 201.17638510642553],
"eval_len": [41, 41, 41, 41, 41, 41, 41, 41, 41, 41]}

 65%|██████▍   | 649999/1000000 [8:26:12<3:28:53, 27.92it/s]global step 650000, trans_decision ep_re 244.1115461697529

{"global_step": 650000, "eval_re": [244.11154616975293, 244.11154616975293, 
244.11154616975293, 244.11154616975293, 244.11154616975293, 244.11154616975293, 
244.11154616975293, 244.11154616975293, 244.11154616975293, 244.11154616975293],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 66%|██████▌   | 659997/1000000 [8:34:01<3:22:28, 27.99it/s]global step 660000, trans_decision ep_re 205.1464454879877

{"global_step": 660000, "eval_re": [205.14644548798768, 205.14644548798768, 
205.14644548798768, 205.14644548798768, 205.14644548798768, 205.14644548798768, 
205.14644548798768, 205.14644548798768, 205.14644548798768, 205.14644548798768],
"eval_len": [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]}

 67%|██████▋   | 669997/1000000 [8:41:50<3:15:03, 28.20it/s]global step 670000, trans_decision ep_re 238.1523250189977

{"global_step": 670000, "eval_re": [238.1523250189977, 238.1523250189977, 
238.1523250189977, 238.1523250189977, 238.1523250189977, 238.1523250189977, 
238.1523250189977, 238.1523250189977, 238.1523250189977, 238.1523250189977], 
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 68%|██████▊   | 679999/1000000 [8:49:50<3:10:03, 28.06it/s]global step 680000, trans_decision ep_re 215.71828519471472

{"global_step": 680000, "eval_re": [215.71828519471472, 215.71828519471472, 
215.71828519471472, 215.71828519471472, 215.71828519471472, 215.71828519471472, 
215.71828519471472, 215.71828519471472, 215.71828519471472, 215.71828519471472],
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 69%|██████▉   | 689997/1000000 [8:57:40<3:04:13, 28.04it/s]global step 690000, trans_decision ep_re 257.8317859961699

{"global_step": 690000, "eval_re": [257.8317859961699, 257.8317859961699, 
257.8317859961699, 257.8317859961699, 257.8317859961699, 257.8317859961699, 
257.8317859961699, 257.8317859961699, 257.8317859961699, 257.8317859961699], 
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 70%|██████▉   | 699999/1000000 [9:05:30<2:57:36, 28.15it/s]global step 700000, trans_decision ep_re 185.9859524398825

{"global_step": 700000, "eval_re": [185.9859524398825, 185.9859524398825, 
185.9859524398825, 185.9859524398825, 185.9859524398825, 185.9859524398825, 
185.9859524398825, 185.9859524398825, 185.9859524398825, 185.9859524398825], 
"eval_len": [38, 38, 38, 38, 38, 38, 38, 38, 38, 38]}

 71%|███████   | 709998/1000000 [9:13:20<2:48:56, 28.61it/s]global step 710000, trans_decision ep_re 219.08263270605607

{"global_step": 710000, "eval_re": [219.08263270605607, 219.08263270605607, 
219.08263270605607, 219.08263270605607, 219.08263270605607, 219.08263270605607, 
219.08263270605607, 219.08263270605607, 219.08263270605607, 219.08263270605607],
"eval_len": [43, 43, 43, 43, 43, 43, 43, 43, 43, 43]}

 72%|███████▏  | 719998/1000000 [9:20:56<2:43:06, 28.61it/s]global step 720000, trans_decision ep_re 227.5453388287223

{"global_step": 720000, "eval_re": [227.54533882872227, 227.54533882872227, 
227.54533882872227, 227.54533882872227, 227.54533882872227, 227.54533882872227, 
227.54533882872227, 227.54533882872227, 227.54533882872227, 227.54533882872227],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

 73%|███████▎  | 729998/1000000 [9:28:45<2:37:05, 28.65it/s]global step 730000, trans_decision ep_re 222.0947699576419

{"global_step": 730000, "eval_re": [222.09156484580575, 222.09156484580575, 
222.09156484580575, 222.09156484580575, 222.09156484580575, 222.09156484580575, 
222.12361596416736, 222.09156484580575, 222.09156484580575, 222.09156484580575],
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 74%|███████▍  | 739999/1000000 [9:36:33<2:32:45, 28.37it/s]global step 740000, trans_decision ep_re 249.3929930531081

{"global_step": 740000, "eval_re": [249.39299305310809, 249.39299305310809, 
249.39299305310809, 249.39299305310809, 249.39299305310809, 249.39299305310809, 
249.39299305310809, 249.39299305310809, 249.39299305310809, 249.39299305310809],
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 75%|███████▍  | 749997/1000000 [9:44:21<2:28:13, 28.11it/s]global step 750000, trans_decision ep_re 376.0091034148744

{"global_step": 750000, "eval_re": [376.00910341487435, 376.00910341487435, 
376.00910341487435, 376.00910341487435, 376.00910341487435, 376.00910341487435, 
376.00910341487435, 376.00910341487435, 376.00910341487435, 376.00910341487435],
"eval_len": [71, 71, 71, 71, 71, 71, 71, 71, 71, 71]}

 76%|███████▌  | 759997/1000000 [9:52:10<2:22:04, 28.15it/s]global step 760000, trans_decision ep_re 262.5604321994724

{"global_step": 760000, "eval_re": [262.5604321994724, 262.5604321994724, 
262.5604321994724, 262.5604321994724, 262.5604321994724, 262.5604321994724, 
262.5604321994724, 262.5604321994724, 262.5604321994724, 262.5604321994724], 
"eval_len": [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]}

 77%|███████▋  | 769998/1000000 [10:00:10<2:12:52, 28.85it/s]global step 770000, trans_decision ep_re 226.90205521382512

{"global_step": 770000, "eval_re": [226.90205521382512, 226.90205521382512, 
226.90205521382512, 226.90205521382512, 226.90205521382512, 226.90205521382512, 
226.90205521382512, 226.90205521382512, 226.90205521382512, 226.90205521382512],
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

 78%|███████▊  | 779998/1000000 [10:08:00<2:07:26, 28.77it/s]global step 780000, trans_decision ep_re 247.42382448269237

{"global_step": 780000, "eval_re": [247.42382448269237, 247.42382448269237, 
247.42382448269237, 247.42382448269237, 247.42382448269237, 247.42382448269237, 
247.42382448269237, 247.42382448269237, 247.42382448269237, 247.42382448269237],
"eval_len": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}

 79%|███████▉  | 789997/1000000 [10:15:35<2:03:50, 28.26it/s]global step 790000, trans_decision ep_re 230.04682006220938

{"global_step": 790000, "eval_re": [230.04682006220935, 230.04682006220935, 
230.04682006220935, 230.04682006220935, 230.04682006220935, 230.04682006220935, 
230.04682006220935, 230.04682006220935, 230.04682006220935, 230.04682006220935],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 80%|███████▉  | 799998/1000000 [10:23:24<1:56:52, 28.52it/s]global step 800000, trans_decision ep_re 234.73148544656266

{"global_step": 800000, "eval_re": [234.73148544656266, 234.73148544656266, 
234.73148544656266, 234.73148544656266, 234.73148544656266, 234.73148544656266, 
234.73148544656266, 234.73148544656266, 234.73148544656266, 234.73148544656266],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 81%|████████  | 809997/1000000 [10:31:12<1:51:06, 28.50it/s]global step 810000, trans_decision ep_re 244.40232310490632

{"global_step": 810000, "eval_re": [244.40232310490634, 244.40232310490634, 
244.40232310490634, 244.40232310490634, 244.40232310490634, 244.40232310490634, 
244.40232310490634, 244.40232310490634, 244.40232310490634, 244.40232310490634],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 82%|████████▏ | 819997/1000000 [10:39:00<1:45:52, 28.34it/s]global step 820000, trans_decision ep_re 213.88862329161356

{"global_step": 820000, "eval_re": [213.88862329161358, 213.88862329161358, 
213.88862329161358, 213.88862329161358, 213.88862329161358, 213.88862329161358, 
213.88862329161358, 213.88862329161358, 213.88862329161358, 213.88862329161358],
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 83%|████████▎ | 829998/1000000 [10:47:00<1:38:39, 28.72it/s]global step 830000, trans_decision ep_re 212.48792003428872

{"global_step": 830000, "eval_re": [212.48792003428872, 212.48792003428872, 
212.48792003428872, 212.48792003428872, 212.48792003428872, 212.48792003428872, 
212.48792003428872, 212.48792003428872, 212.48792003428872, 212.48792003428872],
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 84%|████████▍ | 839997/1000000 [10:54:36<1:34:44, 28.15it/s]global step 840000, trans_decision ep_re 218.6417512016895

{"global_step": 840000, "eval_re": [218.6417512016895, 218.6417512016895, 
218.6417512016895, 218.6417512016895, 218.6417512016895, 218.6417512016895, 
218.6417512016895, 218.6417512016895, 218.6417512016895, 218.6417512016895], 
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 85%|████████▍ | 849999/1000000 [11:02:25<1:28:11, 28.35it/s]global step 850000, trans_decision ep_re 214.90559844612534

{"global_step": 850000, "eval_re": [214.90559844612537, 214.90559844612537, 
214.90559844612537, 214.90559844612537, 214.90559844612537, 214.90559844612537, 
214.90559844612537, 214.90559844612537, 214.90559844612537, 214.90559844612537],
"eval_len": [44, 44, 44, 44, 44, 44, 44, 44, 44, 44]}

 86%|████████▌ | 859998/1000000 [11:10:13<1:21:41, 28.56it/s]global step 860000, trans_decision ep_re 237.540672360703

{"global_step": 860000, "eval_re": [237.54067236070298, 237.54067236070298, 
237.54067236070298, 237.54067236070298, 237.54067236070298, 237.54067236070298, 
237.54067236070298, 237.54067236070298, 237.54067236070298, 237.54067236070298],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

 87%|████████▋ | 869997/1000000 [11:18:01<1:16:20, 28.38it/s]global step 870000, trans_decision ep_re 642.7805486282396

{"global_step": 870000, "eval_re": [642.7805486282396, 642.7805486282396, 
642.7805486282396, 642.7805486282396, 642.7805486282396, 642.7805486282396, 
642.7805486282396, 642.7805486282396, 642.7805486282396, 642.7805486282396], 
"eval_len": [116, 116, 116, 116, 116, 116, 116, 116, 116, 116]}

 88%|████████▊ | 879998/1000000 [11:25:53<1:10:03, 28.55it/s]global step 880000, trans_decision ep_re 204.87502763169877

{"global_step": 880000, "eval_re": [204.8750276316988, 204.8750276316988, 
204.8750276316988, 204.8750276316988, 204.8750276316988, 204.8750276316988, 
204.8750276316988, 204.8750276316988, 204.8750276316988, 204.8750276316988], 
"eval_len": [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]}

 89%|████████▉ | 889999/1000000 [11:33:42<1:05:11, 28.12it/s]global step 890000, trans_decision ep_re 232.6226436211926

{"global_step": 890000, "eval_re": [232.62264362119254, 232.62264362119254, 
232.62264362119254, 232.62264362119254, 232.62264362119254, 232.62264362119254, 
232.62264362119254, 232.62264362119254, 232.62264362119254, 232.62264362119254],
"eval_len": [47, 47, 47, 47, 47, 47, 47, 47, 47, 47]}

 90%|████████▉ | 899999/1000000 [11:41:32<58:57, 28.27it/s]global step 900000, trans_decision ep_re 267.4968523967511

{"global_step": 900000, "eval_re": [267.49685239675114, 267.49685239675114, 
267.49685239675114, 267.49685239675114, 267.49685239675114, 267.49685239675114, 
267.49685239675114, 267.49685239675114, 267.49685239675114, 267.49685239675114],
"eval_len": [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]}

 91%|█████████ | 909997/1000000 [11:49:22<53:34, 28.00it/s]global step 910000, trans_decision ep_re 254.983570879987

{"global_step": 910000, "eval_re": [254.983570879987, 254.983570879987, 
254.983570879987, 254.983570879987, 254.983570879987, 254.983570879987, 
254.983570879987, 254.983570879987, 254.983570879987, 254.983570879987], 
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 92%|█████████▏| 919998/1000000 [11:57:16<46:52, 28.44it/s]global step 920000, trans_decision ep_re 226.8922492058651

{"global_step": 920000, "eval_re": [226.8922492058651, 226.8922492058651, 
226.8922492058651, 226.8922492058651, 226.8922492058651, 226.8922492058651, 
226.8922492058651, 226.8922492058651, 226.8922492058651, 226.8922492058651], 
"eval_len": [46, 46, 46, 46, 46, 46, 46, 46, 46, 46]}

 93%|█████████▎| 929997/1000000 [12:05:11<41:53, 27.85it/s]global step 930000, trans_decision ep_re 208.58720752489094

{"global_step": 930000, "eval_re": [208.58720752489094, 208.58720752489094, 
208.58720752489094, 208.58720752489094, 208.58720752489094, 208.58720752489094, 
208.58720752489094, 208.58720752489094, 208.58720752489094, 208.58720752489094],
"eval_len": [43, 43, 43, 43, 43, 43, 43, 43, 43, 43]}

 94%|█████████▍| 939997/1000000 [12:13:20<35:43, 28.00it/s]global step 940000, trans_decision ep_re 762.7520032972377

{"global_step": 940000, "eval_re": [762.7520032972377, 762.7520032972377, 
762.7520032972377, 762.7520032972377, 762.7520032972377, 762.7520032972377, 
762.7520032972377, 762.7520032972377, 762.7520032972377, 762.7520032972377], 
"eval_len": [151, 151, 151, 151, 151, 151, 151, 151, 151, 151]}

 95%|█████████▍| 949998/1000000 [12:21:05<29:20, 28.40it/s]global step 950000, trans_decision ep_re 210.3527568707631

{"global_step": 950000, "eval_re": [210.35275687076307, 210.35275687076307, 
210.35275687076307, 210.35275687076307, 210.35275687076307, 210.35275687076307, 
210.35275687076307, 210.35275687076307, 210.35275687076307, 210.35275687076307],
"eval_len": [43, 43, 43, 43, 43, 43, 43, 43, 43, 43]}

 96%|█████████▌| 959998/1000000 [12:29:00<23:31, 28.34it/s]global step 960000, trans_decision ep_re 208.36345653324642

{"global_step": 960000, "eval_re": [208.36345653324642, 208.36345653324642, 
208.36345653324642, 208.36345653324642, 208.36345653324642, 208.36345653324642, 
208.36345653324642, 208.36345653324642, 208.36345653324642, 208.36345653324642],
"eval_len": [43, 43, 43, 43, 43, 43, 43, 43, 43, 43]}

 97%|█████████▋| 969998/1000000 [12:36:55<17:37, 28.36it/s]global step 970000, trans_decision ep_re 244.18355485757488

{"global_step": 970000, "eval_re": [244.18355485757485, 244.18355485757485, 
244.18355485757485, 244.18355485757485, 244.18355485757485, 244.18355485757485, 
244.18355485757485, 244.18355485757485, 244.18355485757485, 244.18355485757485],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 98%|█████████▊| 979998/1000000 [12:45:00<11:44, 28.40it/s]global step 980000, trans_decision ep_re 255.29817269764857

{"global_step": 980000, "eval_re": [255.29817269764857, 255.29817269764857, 
255.29817269764857, 255.29817269764857, 255.29817269764857, 255.29817269764857, 
255.29817269764857, 255.29817269764857, 255.29817269764857, 255.29817269764857],
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

 99%|█████████▉| 989997/1000000 [12:52:45<05:58, 27.93it/s]global step 990000, trans_decision ep_re 255.72625456978275

{"global_step": 990000, "eval_re": [255.72625456978278, 255.72625456978278, 
255.72625456978278, 255.72625456978278, 255.72625456978278, 255.72625456978278, 
255.72625456978278, 255.72625456978278, 255.72625456978278, 255.72625456978278],
"eval_len": [51, 51, 51, 51, 51, 51, 51, 51, 51, 51]}

100%|█████████▉| 999999/1000000 [13:00:41<00:00, 27.88it/s]global step 1000000, trans_decision ep_re 238.02047004557417

{"global_step": 1000000, "eval_re": [238.02047004557417, 238.02047004557417, 
238.02047004557417, 238.02047004557417, 238.02047004557417, 238.02047004557417, 
238.02047004557417, 238.02047004557417, 238.02047004557417, 238.02047004557417],
"eval_len": [48, 48, 48, 48, 48, 48, 48, 48, 48, 48]}

100%|██████████| 1000000/1000000 [13:00:54<00:00, 21.34it/s]
