
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.0
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:40<8:29:45, 32.37it/s]global step 10000, trans_decision ep_re -202.8974381286953

{"global_step": 10000, "eval_re": [-202.8974381286953, -202.8974381286953, 
-202.8974381286953, -202.8974381286953, -202.8974381286953, -202.8974381286953, 
-202.8974381286953, -202.8974381286953, -202.8974381286953, -202.8974381286953],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [10:50<8:31:24, 31.94it/s]global step 20000, trans_decision ep_re 442.9617345293615

{"global_step": 20000, "eval_re": [442.9617345293614, 442.9617345293614, 
442.9617345293614, 442.9617345293614, 442.9617345293614, 442.9617345293614, 
442.9617345293614, 442.9617345293614, 442.9617345293614, 442.9617345293614], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29997/1000000 [18:00<8:19:07, 32.39it/s]global step 30000, trans_decision ep_re 633.2815084904244

{"global_step": 30000, "eval_re": [639.0480534494169, 573.6998241193215, 
639.0480534494169, 639.0480534494169, 639.0480534494169, 639.0480534494169, 
639.0480534494169, 639.0480534494169, 639.0480534494169, 646.7308331895864], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [25:10<8:09:39, 32.68it/s]global step 40000, trans_decision ep_re 1104.1705721185979

{"global_step": 40000, "eval_re": [1104.170572118598, 1104.170572118598, 
1104.170572118598, 1104.170572118598, 1104.170572118598, 1104.170572118598, 
1104.170572118598, 1104.170572118598, 1104.170572118598, 1104.170572118598], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49997/1000000 [32:30<8:14:20, 32.03it/s]global step 50000, trans_decision ep_re 788.3528488311007

{"global_step": 50000, "eval_re": [788.3528488311008, 788.3528488311008, 
788.3528488311008, 788.3528488311008, 788.3528488311008, 788.3528488311008, 
788.3528488311008, 788.3528488311008, 788.3528488311008, 788.3528488311008], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [39:40<8:05:04, 32.30it/s]global step 60000, trans_decision ep_re 1078.9795106005313

{"global_step": 60000, "eval_re": [1078.9795106005313, 1078.9795106005313, 
1078.9795106005313, 1078.9795106005313, 1078.9795106005313, 1078.9795106005313, 
1078.9795106005313, 1078.9795106005313, 1078.9795106005313, 1078.9795106005313],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [46:50<7:55:22, 32.61it/s]global step 70000, trans_decision ep_re 1321.8767929027363

{"global_step": 70000, "eval_re": [1321.8767929027363, 1321.8767929027363, 
1321.8767929027363, 1321.8767929027363, 1321.8767929027363, 1321.8767929027363, 
1321.8767929027363, 1321.8767929027363, 1321.8767929027363, 1321.8767929027363],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79997/1000000 [54:00<7:54:45, 32.30it/s]global step 80000, trans_decision ep_re 1087.304502829194

{"global_step": 80000, "eval_re": [1087.304502829194, 1087.304502829194, 
1087.304502829194, 1087.304502829194, 1087.304502829194, 1087.304502829194, 
1087.304502829194, 1087.304502829194, 1087.304502829194, 1087.304502829194], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89997/1000000 [1:01:20<7:47:59, 32.41it/s]global step 90000, trans_decision ep_re 1992.403160465424

{"global_step": 90000, "eval_re": [1992.4031604654242, 1992.4031604654242, 
1992.4031604654242, 1992.4031604654242, 1992.4031604654242, 1992.4031604654242, 
1992.4031604654242, 1992.4031604654242, 1992.4031604654242, 1992.4031604654242],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [1:08:30<7:45:55, 32.19it/s]global step 100000, trans_decision ep_re 2516.2448326986823

{"global_step": 100000, "eval_re": [2516.244832698683, 2516.244832698683, 
2516.244832698683, 2516.244832698683, 2516.244832698683, 2516.244832698683, 
2516.244832698683, 2516.244832698683, 2516.244832698683, 2516.244832698683], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109997/1000000 [1:15:40<7:45:38, 31.86it/s]global step 110000, trans_decision ep_re 1722.5736332757035

{"global_step": 110000, "eval_re": [1722.5736332757035, 1722.5736332757035, 
1722.5736332757035, 1722.5736332757035, 1722.5736332757035, 1722.5736332757035, 
1722.5736332757035, 1722.5736332757035, 1722.5736332757035, 1722.5736332757035],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119997/1000000 [1:22:50<7:33:14, 32.36it/s]global step 120000, trans_decision ep_re 1154.564683254382

{"global_step": 120000, "eval_re": [1154.5646832543819, 1154.5646832543819, 
1154.5646832543819, 1154.5646832543819, 1154.5646832543819, 1154.5646832543819, 
1154.5646832543819, 1154.5646832543819, 1154.5646832543819, 1154.5646832543819],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:30:00<7:27:07, 32.43it/s]global step 130000, trans_decision ep_re 1396.444331551536

{"global_step": 130000, "eval_re": [1396.444331551536, 1396.444331551536, 
1396.444331551536, 1396.444331551536, 1396.444331551536, 1396.444331551536, 
1396.444331551536, 1396.444331551536, 1396.444331551536, 1396.444331551536], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:37:10<7:23:23, 32.33it/s]global step 140000, trans_decision ep_re 1576.624322848234

{"global_step": 140000, "eval_re": [1576.624322848234, 1576.624322848234, 
1576.624322848234, 1576.624322848234, 1576.624322848234, 1576.624322848234, 
1576.624322848234, 1576.624322848234, 1576.624322848234, 1576.624322848234], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149997/1000000 [1:44:20<7:15:24, 32.54it/s]global step 150000, trans_decision ep_re 1219.3755663083198

{"global_step": 150000, "eval_re": [1218.039664666472, 1218.039664666472, 
1198.3018779164358, 1218.039664666472, 1218.039664666472, 1218.039664666472, 
1218.039664666472, 1251.1364678349858, 1218.039664666472, 1218.039664666472], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:51:40<7:13:37, 32.29it/s]global step 160000, trans_decision ep_re 1525.2184147912817

{"global_step": 160000, "eval_re": [1525.2184147912817, 1525.2184147912817, 
1525.2184147912817, 1525.2184147912817, 1525.2184147912817, 1525.2184147912817, 
1525.2184147912817, 1525.2184147912817, 1525.2184147912817, 1525.2184147912817],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [1:58:50<7:10:21, 32.14it/s]global step 170000, trans_decision ep_re 1317.8458036650197

{"global_step": 170000, "eval_re": [1317.84580366502, 1317.84580366502, 
1317.84580366502, 1317.84580366502, 1317.84580366502, 1317.84580366502, 
1317.84580366502, 1317.84580366502, 1317.84580366502, 1317.84580366502], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [2:06:00<7:03:15, 32.29it/s]global step 180000, trans_decision ep_re 2995.669713372504

{"global_step": 180000, "eval_re": [3063.763960479895, 3027.9198636425754, 
3063.763960479895, 3063.763960479895, 3063.763960479895, 3063.763960479895, 
3063.763960479895, 2418.665586243295, 3063.763960479895, 3063.763960479895], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:13:10<6:52:00, 32.77it/s]global step 190000, trans_decision ep_re 1422.2370140453697

{"global_step": 190000, "eval_re": [1422.2370140453697, 1422.2370140453697, 
1422.2370140453697, 1422.2370140453697, 1422.2370140453697, 1422.2370140453697, 
1422.2370140453697, 1422.2370140453697, 1422.2370140453697, 1422.2370140453697],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199997/1000000 [2:20:20<6:52:58, 32.29it/s]global step 200000, trans_decision ep_re 1626.8027422047057

{"global_step": 200000, "eval_re": [1626.559785066906, 1626.559785066906, 
1628.9893564449058, 1626.559785066906, 1626.559785066906, 1626.559785066906, 
1626.559785066906, 1626.559785066906, 1626.559785066906, 1626.559785066906], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209997/1000000 [2:27:20<6:43:03, 32.67it/s]global step 210000, trans_decision ep_re 1822.7061740902868

{"global_step": 210000, "eval_re": [1822.7061740902866, 1822.7061740902866, 
1822.7061740902866, 1822.7061740902866, 1822.7061740902866, 1822.7061740902866, 
1822.7061740902866, 1822.7061740902866, 1822.7061740902866, 1822.7061740902866],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219997/1000000 [2:34:30<6:34:12, 32.98it/s]global step 220000, trans_decision ep_re 3122.9108195107738

{"global_step": 220000, "eval_re": [3122.910819510774, 3122.910819510774, 
3122.910819510774, 3122.910819510774, 3122.910819510774, 3122.910819510774, 
3122.910819510774, 3122.910819510774, 3122.910819510774, 3122.910819510774], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229997/1000000 [2:41:40<6:31:10, 32.81it/s]global step 230000, trans_decision ep_re 2225.338220296618

{"global_step": 230000, "eval_re": [2143.5910327443376, 2143.5910327443376, 
2143.5910327443376, 2143.5910327443376, 2143.5910327443376, 2961.0629082671417, 
2143.5910327443376, 2143.5910327443376, 2143.5910327443376, 2143.5910327443376],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239997/1000000 [2:48:40<6:24:17, 32.96it/s]global step 240000, trans_decision ep_re 1471.3798481766312

{"global_step": 240000, "eval_re": [1473.7451779161433, 1473.7451779161433, 
1480.1211187583972, 1473.7451779161433, 1473.7451779161433, 1473.7451779161433, 
1473.7451779161433, 1459.5708678027254, 1473.7451779161433, 1457.8902497921865],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [2:55:50<6:19:20, 32.95it/s]global step 250000, trans_decision ep_re 2047.8692489066566

{"global_step": 250000, "eval_re": [2050.109091581784, 2050.109091581784, 
2050.109091581784, 2050.109091581784, 2050.109091581784, 2050.109091581784, 
2050.109091581784, 2027.7106648305078, 2050.109091581784, 2050.109091581784], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259997/1000000 [3:03:00<6:19:07, 32.53it/s]global step 260000, trans_decision ep_re 3081.647024077368

{"global_step": 260000, "eval_re": [2460.5147994165272, 3037.9128615728973, 
3037.9128615728973, 3037.9128615728973, 3037.9128615728973, 3037.9128615728973, 
3037.9128615728973, 3037.9128615728973, 3037.9128615728973, 4052.652548773973], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269998/1000000 [3:10:00<6:08:55, 32.98it/s]global step 270000, trans_decision ep_re 1554.6946598044349

{"global_step": 270000, "eval_re": [1554.694659804435, 1554.694659804435, 
1554.694659804435, 1554.694659804435, 1554.694659804435, 1554.694659804435, 
1554.694659804435, 1554.694659804435, 1554.694659804435, 1554.694659804435], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279997/1000000 [3:17:10<6:10:26, 32.39it/s]global step 280000, trans_decision ep_re 2879.9279063301183

{"global_step": 280000, "eval_re": [2879.9279063301183, 2879.9279063301183, 
2879.9279063301183, 2879.9279063301183, 2879.9279063301183, 2879.9279063301183, 
2879.9279063301183, 2879.9279063301183, 2879.9279063301183, 2879.9279063301183],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289997/1000000 [3:24:20<6:00:23, 32.84it/s]global step 290000, trans_decision ep_re 3046.3382992308093

{"global_step": 290000, "eval_re": [2325.6596752872947, 3126.4137018911997, 
3126.4137018911997, 3126.4137018911997, 3126.4137018911997, 3126.4137018911997, 
3126.4137018911997, 3126.4137018911997, 3126.4137018911997, 3126.4137018911997],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299997/1000000 [3:31:20<5:54:36, 32.90it/s]global step 300000, trans_decision ep_re 1677.3417710250021

{"global_step": 300000, "eval_re": [1677.3417710250021, 1677.3417710250021, 
1677.3417710250021, 1677.3417710250021, 1677.3417710250021, 1677.3417710250021, 
1677.3417710250021, 1677.3417710250021, 1677.3417710250021, 1677.3417710250021],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [3:38:30<5:49:01, 32.95it/s]global step 310000, trans_decision ep_re 1899.245591742604

{"global_step": 310000, "eval_re": [1899.245591742604, 1899.245591742604, 
1899.245591742604, 1899.245591742604, 1899.245591742604, 1899.245591742604, 
1899.245591742604, 1899.245591742604, 1899.245591742604, 1899.245591742604], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319997/1000000 [3:45:30<5:44:40, 32.88it/s]global step 320000, trans_decision ep_re 1439.982835142399

{"global_step": 320000, "eval_re": [1439.982835142399, 1439.982835142399, 
1439.982835142399, 1439.982835142399, 1439.982835142399, 1439.982835142399, 
1439.982835142399, 1439.982835142399, 1439.982835142399, 1439.982835142399], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [3:52:40<5:39:21, 32.91it/s]global step 330000, trans_decision ep_re 1613.802518097636

{"global_step": 330000, "eval_re": [1613.802518097636, 1613.802518097636, 
1613.802518097636, 1613.802518097636, 1613.802518097636, 1613.802518097636, 
1613.802518097636, 1613.802518097636, 1613.802518097636, 1613.802518097636], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [3:59:50<5:40:30, 32.30it/s]global step 340000, trans_decision ep_re 2896.493325279553

{"global_step": 340000, "eval_re": [2896.493325279553, 2896.493325279553, 
2896.493325279553, 2896.493325279553, 2896.493325279553, 2896.493325279553, 
2896.493325279553, 2896.493325279553, 2896.493325279553, 2896.493325279553], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [4:06:50<5:27:57, 33.03it/s]global step 350000, trans_decision ep_re 2919.7257190802457

{"global_step": 350000, "eval_re": [2919.725719080246, 2919.725719080246, 
2919.725719080246, 2919.725719080246, 2919.725719080246, 2919.725719080246, 
2919.725719080246, 2919.725719080246, 2919.725719080246, 2919.725719080246], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [4:14:00<5:25:14, 32.80it/s]global step 360000, trans_decision ep_re 1832.8865633977243

{"global_step": 360000, "eval_re": [1832.886563397724, 1832.886563397724, 
1832.886563397724, 1832.886563397724, 1832.886563397724, 1832.886563397724, 
1832.886563397724, 1832.886563397724, 1832.886563397724, 1832.886563397724], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369997/1000000 [4:21:10<5:18:08, 33.00it/s]global step 370000, trans_decision ep_re 2221.212460141448

{"global_step": 370000, "eval_re": [2221.2124601414484, 2221.2124601414484, 
2221.2124601414484, 2221.2124601414484, 2221.2124601414484, 2221.2124601414484, 
2221.2124601414484, 2221.2124601414484, 2221.2124601414484, 2221.2124601414484],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379997/1000000 [4:28:10<5:14:25, 32.86it/s]global step 380000, trans_decision ep_re 2261.459410398231

{"global_step": 380000, "eval_re": [2261.459410398231, 2261.459410398231, 
2261.459410398231, 2261.459410398231, 2261.459410398231, 2261.459410398231, 
2261.459410398231, 2261.459410398231, 2261.459410398231, 2261.459410398231], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389997/1000000 [4:35:20<5:10:39, 32.73it/s]global step 390000, trans_decision ep_re 2295.186562890019

{"global_step": 390000, "eval_re": [2295.186562890019, 2295.186562890019, 
2295.186562890019, 2295.186562890019, 2295.186562890019, 2295.186562890019, 
2295.186562890019, 2295.186562890019, 2295.186562890019, 2295.186562890019], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [4:42:30<5:04:58, 32.79it/s]global step 400000, trans_decision ep_re 1462.1885215696352

{"global_step": 400000, "eval_re": [1462.1885215696352, 1462.1885215696352, 
1462.1885215696352, 1462.1885215696352, 1462.1885215696352, 1462.1885215696352, 
1462.1885215696352, 1462.1885215696352, 1462.1885215696352, 1462.1885215696352],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409997/1000000 [4:49:30<4:59:18, 32.85it/s]global step 410000, trans_decision ep_re 1646.9799278013902

{"global_step": 410000, "eval_re": [1646.9799278013904, 1646.9799278013904, 
1646.9799278013904, 1646.9799278013904, 1646.9799278013904, 1646.9799278013904, 
1646.9799278013904, 1646.9799278013904, 1646.9799278013904, 1646.9799278013904],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [4:56:40<4:56:02, 32.65it/s]global step 420000, trans_decision ep_re 2629.200150542865

{"global_step": 420000, "eval_re": [2629.2001505428652, 2629.2001505428652, 
2629.2001505428652, 2629.2001505428652, 2629.2001505428652, 2629.2001505428652, 
2629.2001505428652, 2629.2001505428652, 2629.2001505428652, 2629.2001505428652],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429997/1000000 [5:03:40<4:47:42, 33.02it/s]global step 430000, trans_decision ep_re 2652.924170194117

{"global_step": 430000, "eval_re": [2652.924170194117, 2652.924170194117, 
2652.924170194117, 2652.924170194117, 2652.924170194117, 2652.924170194117, 
2652.924170194117, 2652.924170194117, 2652.924170194117, 2652.924170194117], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439997/1000000 [5:10:50<4:43:55, 32.87it/s]global step 440000, trans_decision ep_re 2405.795415691987

{"global_step": 440000, "eval_re": [2405.7954156919864, 2405.7954156919864, 
2405.7954156919864, 2405.7954156919864, 2405.7954156919864, 2405.7954156919864, 
2405.7954156919864, 2405.7954156919864, 2405.7954156919864, 2405.7954156919864],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449998/1000000 [5:18:00<4:38:42, 32.89it/s]global step 450000, trans_decision ep_re 2308.317682913076

{"global_step": 450000, "eval_re": [2308.3176829130766, 2308.3176829130766, 
2308.3176829130766, 2308.3176829130766, 2308.3176829130766, 2308.3176829130766, 
2308.3176829130766, 2308.3176829130766, 2308.3176829130766, 2308.3176829130766],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459997/1000000 [5:25:00<4:33:04, 32.96it/s]global step 460000, trans_decision ep_re 2478.534241943941

{"global_step": 460000, "eval_re": [2476.9625945462553, 2476.9625945462553, 
2476.9625945462553, 2476.9625945462553, 2476.9625945462553, 2492.6790685231185, 
2476.9625945462553, 2476.9625945462553, 2476.9625945462553, 2476.9625945462553],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469997/1000000 [5:32:10<4:28:46, 32.87it/s]global step 470000, trans_decision ep_re 3191.2060260507683

{"global_step": 470000, "eval_re": [3191.2060260507683, 3191.2060260507683, 
3191.2060260507683, 3191.2060260507683, 3191.2060260507683, 3191.2060260507683, 
3191.2060260507683, 3191.2060260507683, 3191.2060260507683, 3191.2060260507683],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479997/1000000 [5:39:10<4:22:34, 33.01it/s]global step 480000, trans_decision ep_re 2101.9107281290503

{"global_step": 480000, "eval_re": [2101.9107281290508, 2101.9107281290508, 
2101.9107281290508, 2101.9107281290508, 2101.9107281290508, 2101.9107281290508, 
2101.9107281290508, 2101.9107281290508, 2101.9107281290508, 2101.9107281290508],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489997/1000000 [5:46:20<4:18:37, 32.87it/s]global step 490000, trans_decision ep_re 3081.738396956437

{"global_step": 490000, "eval_re": [3106.2112073028734, 3106.2112073028734, 
3106.2112073028734, 3106.2112073028734, 2861.483103838512, 3106.2112073028734, 
3106.2112073028734, 3106.2112073028734, 3106.2112073028734, 3106.2112073028734],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499997/1000000 [5:53:30<4:13:24, 32.89it/s]global step 500000, trans_decision ep_re 1883.2088881331813

{"global_step": 500000, "eval_re": [1883.2088881331813, 1883.2088881331813, 
1883.2088881331813, 1883.2088881331813, 1883.2088881331813, 1883.2088881331813, 
1883.2088881331813, 1883.2088881331813, 1883.2088881331813, 1883.2088881331813],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509997/1000000 [6:00:30<4:07:41, 32.97it/s]global step 510000, trans_decision ep_re 1276.9854104302472

{"global_step": 510000, "eval_re": [1276.9854104302474, 1276.9854104302474, 
1276.9854104302474, 1276.9854104302474, 1276.9854104302474, 1276.9854104302474, 
1276.9854104302474, 1276.9854104302474, 1276.9854104302474, 1276.9854104302474],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [6:07:40<4:02:55, 32.93it/s]global step 520000, trans_decision ep_re 2408.9124206663737

{"global_step": 520000, "eval_re": [2408.9124206663737, 2408.9124206663737, 
2408.9124206663737, 2408.9124206663737, 2408.9124206663737, 2408.9124206663737, 
2408.9124206663737, 2408.9124206663737, 2408.9124206663737, 2408.9124206663737],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529997/1000000 [6:14:40<3:58:37, 32.83it/s]global step 530000, trans_decision ep_re 1391.3360894657778

{"global_step": 530000, "eval_re": [1391.3360894657778, 1391.3360894657778, 
1391.3360894657778, 1391.3360894657778, 1391.3360894657778, 1391.3360894657778, 
1391.3360894657778, 1391.3360894657778, 1391.3360894657778, 1391.3360894657778],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [6:21:50<3:59:29, 32.01it/s]global step 540000, trans_decision ep_re 2247.6379689872606

{"global_step": 540000, "eval_re": [2233.7161933348175, 2233.7161933348175, 
2233.7161933348175, 2372.9339498592512, 2233.7161933348175, 2233.7161933348175, 
2233.7161933348175, 2233.7161933348175, 2233.7161933348175, 2233.7161933348175],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549997/1000000 [6:29:00<3:47:03, 33.03it/s]global step 550000, trans_decision ep_re 3073.9225272002486

{"global_step": 550000, "eval_re": [3142.2302984279227, 3142.2302984279227, 
3142.2302984279227, 3142.2302984279227, 3142.2302984279227, 3142.2302984279227, 
3142.2302984279227, 2459.152586151179, 3142.2302984279227, 3142.2302984279227], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [6:36:10<3:42:46, 32.92it/s]global step 560000, trans_decision ep_re 2229.9091610266723

{"global_step": 560000, "eval_re": [2229.909161026672, 2229.909161026672, 
2229.909161026672, 2229.909161026672, 2229.909161026672, 2229.909161026672, 
2229.909161026672, 2229.909161026672, 2229.909161026672, 2229.909161026672], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [6:43:10<3:37:56, 32.88it/s]global step 570000, trans_decision ep_re 1401.0115713209539

{"global_step": 570000, "eval_re": [1401.0115713209539, 1401.0115713209539, 
1401.0115713209539, 1401.0115713209539, 1401.0115713209539, 1401.0115713209539, 
1401.0115713209539, 1401.0115713209539, 1401.0115713209539, 1401.0115713209539],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [6:50:20<3:32:15, 32.98it/s]global step 580000, trans_decision ep_re 1398.8068101722674

{"global_step": 580000, "eval_re": [1401.4118843561105, 1401.4118843561105, 
1401.4118843561105, 1401.4118843561105, 1401.4118843561105, 1375.3611425176812, 
1401.4118843561105, 1401.4118843561105, 1401.4118843561105, 1401.4118843561105],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589997/1000000 [6:57:20<3:30:51, 32.41it/s]global step 590000, trans_decision ep_re 2676.2479552021696

{"global_step": 590000, "eval_re": [2676.24795520217, 2676.24795520217, 
2676.24795520217, 2676.24795520217, 2676.24795520217, 2676.24795520217, 
2676.24795520217, 2676.24795520217, 2676.24795520217, 2676.24795520217], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599997/1000000 [7:04:30<3:23:07, 32.82it/s]global step 600000, trans_decision ep_re 2339.1998600155553

{"global_step": 600000, "eval_re": [2406.7162728067174, 2406.7162728067174, 
2406.7162728067174, 2406.7162728067174, 2406.7162728067174, 2406.7162728067174, 
2406.7162728067174, 1731.5521448950994, 2406.7162728067174, 2406.7162728067174],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609996/1000000 [7:11:40<3:17:59, 32.83it/s]global step 610000, trans_decision ep_re 2501.7374406264134

{"global_step": 610000, "eval_re": [2501.7374406264134, 2501.7374406264134, 
2501.7374406264134, 2501.7374406264134, 2501.7374406264134, 2501.7374406264134, 
2501.7374406264134, 2501.7374406264134, 2501.7374406264134, 2501.7374406264134],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [7:18:50<3:12:27, 32.91it/s]global step 620000, trans_decision ep_re 3280.8409579716003

{"global_step": 620000, "eval_re": [3280.8409579716003, 3280.8409579716003, 
3280.8409579716003, 3280.8409579716003, 3280.8409579716003, 3280.8409579716003, 
3280.8409579716003, 3280.8409579716003, 3280.8409579716003, 3280.8409579716003],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [7:25:50<3:07:56, 32.81it/s]global step 630000, trans_decision ep_re 2406.018503371004

{"global_step": 630000, "eval_re": [2406.018503371004, 2406.018503371004, 
2406.018503371004, 2406.018503371004, 2406.018503371004, 2406.018503371004, 
2406.018503371004, 2406.018503371004, 2406.018503371004, 2406.018503371004], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639998/1000000 [7:33:00<3:01:37, 33.03it/s]global step 640000, trans_decision ep_re 2276.6159037958196

{"global_step": 640000, "eval_re": [2309.7628749637424, 2309.7628749637424, 
2309.7628749637424, 2309.7628749637424, 2309.7628749637424, 2309.7628749637424, 
1978.2931632845102, 2309.7628749637424, 2309.7628749637424, 2309.7628749637424],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [7:40:00<2:57:21, 32.89it/s]global step 650000, trans_decision ep_re 1695.77982049537

{"global_step": 650000, "eval_re": [1695.188752353525, 1695.188752353525, 
1695.188752353525, 1695.188752353525, 1695.188752353525, 1695.188752353525, 
1695.188752353525, 1695.188752353525, 1701.0994337719753, 1695.188752353525], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659998/1000000 [7:47:10<2:52:21, 32.88it/s]global step 660000, trans_decision ep_re 1339.8419654216912

{"global_step": 660000, "eval_re": [1321.5941379642936, 1534.676026017865, 
1321.5941379642936, 1321.5941379642936, 1321.5941379642936, 1321.5941379642936, 
1321.5941379642936, 1321.5941379642936, 1321.5941379642936, 1290.9905244846973],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [7:54:10<2:46:57, 32.94it/s]global step 670000, trans_decision ep_re 3015.0700089435372

{"global_step": 670000, "eval_re": [3015.0700089435372, 3015.0700089435372, 
3015.0700089435372, 3015.0700089435372, 3015.0700089435372, 3015.0700089435372, 
3015.0700089435372, 3015.0700089435372, 3015.0700089435372, 3015.0700089435372],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [8:01:20<2:42:20, 32.85it/s]global step 680000, trans_decision ep_re 2161.8253128832134

{"global_step": 680000, "eval_re": [2161.825312883213, 2161.825312883213, 
2161.825312883213, 2161.825312883213, 2161.825312883213, 2161.825312883213, 
2161.825312883213, 2161.825312883213, 2161.825312883213, 2161.825312883213], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [8:08:20<2:36:59, 32.91it/s]global step 690000, trans_decision ep_re 1479.5542376525225

{"global_step": 690000, "eval_re": [1479.5542376525225, 1479.5542376525225, 
1479.5542376525225, 1479.5542376525225, 1479.5542376525225, 1479.5542376525225, 
1479.5542376525225, 1479.5542376525225, 1479.5542376525225, 1479.5542376525225],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699997/1000000 [8:15:30<2:31:53, 32.92it/s]global step 700000, trans_decision ep_re 1368.8561676066365

{"global_step": 700000, "eval_re": [1368.8561676066365, 1368.8561676066365, 
1368.8561676066365, 1368.8561676066365, 1368.8561676066365, 1368.8561676066365, 
1368.8561676066365, 1368.8561676066365, 1368.8561676066365, 1368.8561676066365],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [8:22:30<2:26:36, 32.97it/s]global step 710000, trans_decision ep_re 1846.2865657089665

{"global_step": 710000, "eval_re": [1846.2865657089662, 1846.2865657089662, 
1846.2865657089662, 1846.2865657089662, 1846.2865657089662, 1846.2865657089662, 
1846.2865657089662, 1846.2865657089662, 1846.2865657089662, 1846.2865657089662],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719997/1000000 [8:29:40<2:21:48, 32.91it/s]global step 720000, trans_decision ep_re 1640.2804207037711

{"global_step": 720000, "eval_re": [1636.3153264417067, 1640.7209867328893, 
1640.7209867328893, 1640.7209867328893, 1640.7209867328893, 1640.7209867328893, 
1640.7209867328893, 1640.7209867328893, 1640.7209867328893, 1640.7209867328893],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [8:36:40<2:16:55, 32.86it/s]global step 730000, trans_decision ep_re 2255.0706833855743

{"global_step": 730000, "eval_re": [2255.2551723787906, 2255.2551723787906, 
2255.2551723787906, 2255.2551723787906, 2255.2551723787906, 2255.2551723787906, 
2255.2551723787906, 2253.4102824466263, 2255.2551723787906, 2255.2551723787906],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739997/1000000 [8:43:50<2:12:42, 32.65it/s]global step 740000, trans_decision ep_re 1587.1288625602247

{"global_step": 740000, "eval_re": [1587.1288625602247, 1587.1288625602247, 
1587.1288625602247, 1587.1288625602247, 1587.1288625602247, 1587.1288625602247, 
1587.1288625602247, 1587.1288625602247, 1587.1288625602247, 1587.1288625602247],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749997/1000000 [8:50:50<2:06:27, 32.95it/s]global step 750000, trans_decision ep_re 1807.267326367703

{"global_step": 750000, "eval_re": [1807.2673263677027, 1807.2673263677027, 
1807.2673263677027, 1807.2673263677027, 1807.2673263677027, 1807.2673263677027, 
1807.2673263677027, 1807.2673263677027, 1807.2673263677027, 1807.2673263677027],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759997/1000000 [8:58:00<2:01:23, 32.95it/s]global step 760000, trans_decision ep_re 2619.0619609908913

{"global_step": 760000, "eval_re": [2619.0619609908917, 2619.0619609908917, 
2619.0619609908917, 2619.0619609908917, 2619.0619609908917, 2619.0619609908917, 
2619.0619609908917, 2619.0619609908917, 2619.0619609908917, 2619.0619609908917],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [9:05:00<1:56:25, 32.93it/s]global step 770000, trans_decision ep_re 1348.4239435780764

{"global_step": 770000, "eval_re": [1348.4239435780764, 1348.4239435780764, 
1348.4239435780764, 1348.4239435780764, 1348.4239435780764, 1348.4239435780764, 
1348.4239435780764, 1348.4239435780764, 1348.4239435780764, 1348.4239435780764],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779999/1000000 [9:12:10<1:51:33, 32.87it/s]global step 780000, trans_decision ep_re 3201.9213500400106

{"global_step": 780000, "eval_re": [3201.9213500400106, 3201.9213500400106, 
3201.9213500400106, 3201.9213500400106, 3201.9213500400106, 3201.9213500400106, 
3201.9213500400106, 3201.9213500400106, 3201.9213500400106, 3201.9213500400106],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789997/1000000 [9:19:10<1:46:11, 32.96it/s]global step 790000, trans_decision ep_re 2745.7258208549797

{"global_step": 790000, "eval_re": [2745.7258208549797, 2745.7258208549797, 
2745.7258208549797, 2745.7258208549797, 2745.7258208549797, 2745.7258208549797, 
2745.7258208549797, 2745.7258208549797, 2745.7258208549797, 2745.7258208549797],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [9:26:20<1:41:27, 32.85it/s]global step 800000, trans_decision ep_re 1287.7187171457358

{"global_step": 800000, "eval_re": [1253.4986138540432, 1291.5209508448124, 
1291.5209508448124, 1291.5209508448124, 1291.5209508448124, 1291.5209508448124, 
1291.5209508448124, 1291.5209508448124, 1291.5209508448124, 1291.5209508448124],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [9:33:20<1:36:11, 32.92it/s]global step 810000, trans_decision ep_re 1824.584333817536

{"global_step": 810000, "eval_re": [1825.845438914361, 1825.845438914361, 
1825.845438914361, 1825.845438914361, 1813.2343879461077, 1825.845438914361, 
1825.845438914361, 1825.845438914361, 1825.845438914361, 1825.845438914361], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819997/1000000 [9:40:30<1:30:57, 32.98it/s]global step 820000, trans_decision ep_re 2961.3617372889817

{"global_step": 820000, "eval_re": [2986.08694648985, 2986.08694648985, 
2986.08694648985, 2986.08694648985, 2986.08694648985, 2986.08694648985, 
2986.08694648985, 2986.08694648985, 2738.834854481166, 2986.08694648985], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [9:47:30<1:26:15, 32.85it/s]global step 830000, trans_decision ep_re 2893.7294074611536

{"global_step": 830000, "eval_re": [3232.293656023535, 2856.111157620888, 
2856.111157620888, 2856.111157620888, 2856.111157620888, 2856.111157620888, 
2856.111157620888, 2856.111157620888, 2856.111157620888, 2856.111157620888], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839997/1000000 [9:54:40<1:20:56, 32.95it/s]global step 840000, trans_decision ep_re 1485.345712305456

{"global_step": 840000, "eval_re": [1490.1146008343098, 1490.1146008343098, 
1490.1146008343098, 1452.3114396912827, 1490.1146008343098, 1490.1146008343098, 
1490.1146008343098, 1490.1146008343098, 1490.1146008343098, 1480.2288766887975],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [10:01:40<1:15:47, 32.98it/s]global step 850000, trans_decision ep_re 4684.170645443884

{"global_step": 850000, "eval_re": [4684.170645443884, 4684.170645443884, 
4684.170645443884, 4684.170645443884, 4684.170645443884, 4684.170645443884, 
4684.170645443884, 4684.170645443884, 4684.170645443884, 4684.170645443884], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [10:08:50<1:11:05, 32.82it/s]global step 860000, trans_decision ep_re 1769.1392126832657

{"global_step": 860000, "eval_re": [1769.1392126832654, 1769.1392126832654, 
1769.1392126832654, 1769.1392126832654, 1769.1392126832654, 1769.1392126832654, 
1769.1392126832654, 1769.1392126832654, 1769.1392126832654, 1769.1392126832654],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869997/1000000 [10:15:50<1:05:41, 32.98it/s]global step 870000, trans_decision ep_re 1303.35646982037

{"global_step": 870000, "eval_re": [1303.35646982037, 1303.35646982037, 
1303.35646982037, 1303.35646982037, 1303.35646982037, 1303.35646982037, 
1303.35646982037, 1303.35646982037, 1303.35646982037, 1303.35646982037], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [10:23:00<1:00:38, 32.98it/s]global step 880000, trans_decision ep_re 1477.8368098542292

{"global_step": 880000, "eval_re": [1484.242864482926, 1484.242864482926, 
1484.242864482926, 1484.242864482926, 1484.242864482926, 1484.242864482926, 
1484.242864482926, 1484.242864482926, 1484.242864482926, 1420.182318195956], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [10:30:00<55:40, 32.93it/s]global step 890000, trans_decision ep_re 1378.9956127822281

{"global_step": 890000, "eval_re": [1378.9956127822281, 1378.9956127822281, 
1378.9956127822281, 1378.9956127822281, 1378.9956127822281, 1378.9956127822281, 
1378.9956127822281, 1378.9956127822281, 1378.9956127822281, 1378.9956127822281],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899998/1000000 [10:37:10<50:37, 32.92it/s]global step 900000, trans_decision ep_re 1498.8636280174792

{"global_step": 900000, "eval_re": [1498.863628017479, 1498.863628017479, 
1498.863628017479, 1498.863628017479, 1498.863628017479, 1498.863628017479, 
1498.863628017479, 1498.863628017479, 1498.863628017479, 1498.863628017479], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [10:44:10<45:23, 33.04it/s]global step 910000, trans_decision ep_re 1452.1321596166867

{"global_step": 910000, "eval_re": [1452.1321596166865, 1452.1321596166865, 
1452.1321596166865, 1452.1321596166865, 1452.1321596166865, 1452.1321596166865, 
1452.1321596166865, 1452.1321596166865, 1452.1321596166865, 1452.1321596166865],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [10:51:20<40:32, 32.89it/s]global step 920000, trans_decision ep_re 3072.2328473762977

{"global_step": 920000, "eval_re": [3072.2328473762977, 3072.2328473762977, 
3072.2328473762977, 3072.2328473762977, 3072.2328473762977, 3072.2328473762977, 
3072.2328473762977, 3072.2328473762977, 3072.2328473762977, 3072.2328473762977],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [10:58:30<35:26, 32.92it/s]global step 930000, trans_decision ep_re 1234.0709139298117

{"global_step": 930000, "eval_re": [1234.0709139298115, 1234.0709139298115, 
1234.0709139298115, 1234.0709139298115, 1234.0709139298115, 1234.0709139298115, 
1234.0709139298115, 1234.0709139298115, 1234.0709139298115, 1234.0709139298115],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [11:05:30<30:25, 32.87it/s]global step 940000, trans_decision ep_re 1414.9304819043496

{"global_step": 940000, "eval_re": [1414.9304819043496, 1414.9304819043496, 
1414.9304819043496, 1414.9304819043496, 1414.9304819043496, 1414.9304819043496, 
1414.9304819043496, 1414.9304819043496, 1414.9304819043496, 1414.9304819043496],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [11:12:40<25:17, 32.95it/s]global step 950000, trans_decision ep_re 1688.7440324672345

{"global_step": 950000, "eval_re": [1688.43822890614, 1688.43822890614, 
1688.43822890614, 1688.43822890614, 1688.43822890614, 1688.43822890614, 
1688.43822890614, 1688.43822890614, 1691.4962645170863, 1688.43822890614], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [11:19:40<20:16, 32.89it/s]global step 960000, trans_decision ep_re 1510.7845801502785

{"global_step": 960000, "eval_re": [1510.7845801502783, 1510.7845801502783, 
1510.7845801502783, 1510.7845801502783, 1510.7845801502783, 1510.7845801502783, 
1510.7845801502783, 1510.7845801502783, 1510.7845801502783, 1510.7845801502783],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [11:26:50<15:10, 32.95it/s]global step 970000, trans_decision ep_re 1804.421824629097

{"global_step": 970000, "eval_re": [1804.4218246290968, 1804.4218246290968, 
1804.4218246290968, 1804.4218246290968, 1804.4218246290968, 1804.4218246290968, 
1804.4218246290968, 1804.4218246290968, 1804.4218246290968, 1804.4218246290968],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [11:34:00<10:08, 32.89it/s]global step 980000, trans_decision ep_re 1351.4661234134371

{"global_step": 980000, "eval_re": [1351.4661234134373, 1351.4661234134373, 
1351.4661234134373, 1351.4661234134373, 1351.4661234134373, 1351.4661234134373, 
1351.4661234134373, 1351.4661234134373, 1351.4661234134373, 1351.4661234134373],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [11:41:10<05:04, 32.87it/s]global step 990000, trans_decision ep_re 2439.488438203578

{"global_step": 990000, "eval_re": [2545.1421833394793, 2545.1421833394793, 
1488.6047319804711, 2545.1421833394793, 2545.1421833394793, 2545.1421833394793, 
2545.1421833394793, 2545.1421833394793, 2545.1421833394793, 2545.1421833394793],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999997/1000000 [11:48:20<00:00, 32.91it/s]global step 1000000, trans_decision ep_re 1436.8325376004766

{"global_step": 1000000, "eval_re": [1436.8325376004768, 1436.8325376004768, 
1436.8325376004768, 1436.8325376004768, 1436.8325376004768, 1436.8325376004768, 
1436.8325376004768, 1436.8325376004768, 1436.8325376004768, 1436.8325376004768],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [11:48:45<00:00, 23.52it/s]
