
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9998/1000000 [04:30<10:26:22, 26.34it/s]global step 10000, trans_decision ep_re 74.25581823012729

{"global_step": 10000, "eval_re": [74.25581823012729, 74.25581823012729, 
74.25581823012729, 74.25581823012729, 74.25581823012729, 74.25581823012729, 
74.25581823012729, 74.25581823012729, 74.25581823012729, 74.25581823012729], 
"eval_len": [58, 58, 58, 58, 58, 58, 58, 58, 58, 58]}

  2%|▏         | 19999/1000000 [12:50<10:43:55, 25.37it/s]global step 20000, trans_decision ep_re 92.86528147538647

{"global_step": 20000, "eval_re": [92.86528147538647, 92.86528147538647, 
92.86528147538647, 92.86528147538647, 92.86528147538647, 92.86528147538647, 
92.86528147538647, 92.86528147538647, 92.86528147538647, 92.86528147538647], 
"eval_len": [71, 71, 71, 71, 71, 71, 71, 71, 71, 71]}

  3%|▎         | 29998/1000000 [21:40<10:20:44, 26.04it/s]global step 30000, trans_decision ep_re 158.70920968310546

{"global_step": 30000, "eval_re": [158.70920968310546, 158.70920968310546, 
158.70920968310546, 158.70920968310546, 158.70920968310546, 158.70920968310546, 
158.70920968310546, 158.70920968310546, 158.70920968310546, 158.70920968310546],
"eval_len": [102, 102, 102, 102, 102, 102, 102, 102, 102, 102]}

  4%|▍         | 39998/1000000 [30:20<10:19:24, 25.83it/s]global step 40000, trans_decision ep_re 426.4353479594888

{"global_step": 40000, "eval_re": [426.4353479594888, 426.4353479594888, 
426.4353479594888, 426.4353479594888, 426.4353479594888, 426.4353479594888, 
426.4353479594888, 426.4353479594888, 426.4353479594888, 426.4353479594888], 
"eval_len": [204, 204, 204, 204, 204, 204, 204, 204, 204, 204]}

  5%|▍         | 49997/1000000 [39:00<10:19:31, 25.56it/s]global step 50000, trans_decision ep_re 425.090189812719

{"global_step": 50000, "eval_re": [425.090189812719, 425.090189812719, 
425.090189812719, 425.090189812719, 425.090189812719, 425.090189812719, 
425.090189812719, 425.090189812719, 425.090189812719, 425.090189812719], 
"eval_len": [190, 190, 190, 190, 190, 190, 190, 190, 190, 190]}

  6%|▌         | 59999/1000000 [47:40<10:12:29, 25.58it/s]global step 60000, trans_decision ep_re 137.05122862469096

{"global_step": 60000, "eval_re": [137.05122862469096, 137.05122862469096, 
137.05122862469096, 137.05122862469096, 137.05122862469096, 137.05122862469096, 
137.05122862469096, 137.05122862469096, 137.05122862469096, 137.05122862469096],
"eval_len": [85, 85, 85, 85, 85, 85, 85, 85, 85, 85]}

  7%|▋         | 69998/1000000 [56:20<9:53:23, 26.12it/s]global step 70000, trans_decision ep_re 424.2145160267504

{"global_step": 70000, "eval_re": [424.2145160267504, 424.2145160267504, 
424.2145160267504, 424.2145160267504, 424.2145160267504, 424.2145160267504, 
424.2145160267504, 424.2145160267504, 424.2145160267504, 424.2145160267504], 
"eval_len": [196, 196, 196, 196, 196, 196, 196, 196, 196, 196]}

  8%|▊         | 79998/1000000 [1:04:42<9:52:32, 25.88it/s] global step 80000, trans_decision ep_re 64.48359489329131

{"global_step": 80000, "eval_re": [64.48359489329131, 64.48359489329131, 
64.48359489329131, 64.48359489329131, 64.48359489329131, 64.48359489329131, 
64.48359489329131, 64.48359489329131, 64.48359489329131, 64.48359489329131], 
"eval_len": [56, 56, 56, 56, 56, 56, 56, 56, 56, 56]}

  9%|▉         | 89999/1000000 [1:13:30<9:48:19, 25.78it/s]global step 90000, trans_decision ep_re 87.2541773430407

{"global_step": 90000, "eval_re": [87.25417734304068, 87.25417734304068, 
87.25417734304068, 87.25417734304068, 87.25417734304068, 87.25417734304068, 
87.25417734304068, 87.25417734304068, 87.25417734304068, 87.25417734304068], 
"eval_len": [67, 67, 67, 67, 67, 67, 67, 67, 67, 67]}

 10%|▉         | 99998/1000000 [1:22:10<9:37:51, 25.96it/s]global step 100000, trans_decision ep_re 883.7787455162367

{"global_step": 100000, "eval_re": [883.7787455162367, 883.7787455162367, 
883.7787455162367, 883.7787455162367, 883.7787455162367, 883.7787455162367, 
883.7787455162367, 883.7787455162367, 883.7787455162367, 883.7787455162367], 
"eval_len": [305, 305, 305, 305, 305, 305, 305, 305, 305, 305]}

 11%|█         | 109999/1000000 [1:31:00<9:36:50, 25.71it/s]global step 110000, trans_decision ep_re 1935.7668195552792

{"global_step": 110000, "eval_re": [1935.7668195552792, 1935.7668195552792, 
1935.7668195552792, 1935.7668195552792, 1935.7668195552792, 1935.7668195552792, 
1935.7668195552792, 1935.7668195552792, 1935.7668195552792, 1935.7668195552792],
"eval_len": [670, 670, 670, 670, 670, 670, 670, 670, 670, 670]}

 12%|█▏        | 119998/1000000 [1:39:50<9:24:15, 25.99it/s]global step 120000, trans_decision ep_re 1545.9320651494158

{"global_step": 120000, "eval_re": [1545.9320651494156, 1545.9320651494156, 
1545.9320651494156, 1545.9320651494156, 1545.9320651494156, 1545.9320651494156, 
1545.9320651494156, 1545.9320651494156, 1545.9320651494156, 1545.9320651494156],
"eval_len": [507, 507, 507, 507, 507, 507, 507, 507, 507, 507]}

 13%|█▎        | 129997/1000000 [1:48:40<9:28:31, 25.50it/s]global step 130000, trans_decision ep_re 84.6366651697542

{"global_step": 130000, "eval_re": [84.6366651697542, 84.6366651697542, 
84.6366651697542, 84.6366651697542, 84.6366651697542, 84.6366651697542, 
84.6366651697542, 84.6366651697542, 84.6366651697542, 84.6366651697542], 
"eval_len": [61, 61, 61, 61, 61, 61, 61, 61, 61, 61]}

 14%|█▍        | 139999/1000000 [1:57:20<9:18:06, 25.68it/s]global step 140000, trans_decision ep_re 1336.7231690469591

{"global_step": 140000, "eval_re": [1336.723169046959, 1336.723169046959, 
1336.723169046959, 1336.723169046959, 1336.723169046959, 1336.723169046959, 
1336.723169046959, 1336.723169046959, 1336.723169046959, 1336.723169046959], 
"eval_len": [411, 411, 411, 411, 411, 411, 411, 411, 411, 411]}

 15%|█▍        | 149998/1000000 [2:06:10<9:12:05, 25.66it/s]global step 150000, trans_decision ep_re 1182.824897865979

{"global_step": 150000, "eval_re": [1182.824897865979, 1182.824897865979, 
1182.824897865979, 1182.824897865979, 1182.824897865979, 1182.824897865979, 
1182.824897865979, 1182.824897865979, 1182.824897865979, 1182.824897865979], 
"eval_len": [357, 357, 357, 357, 357, 357, 357, 357, 357, 357]}

 16%|█▌        | 159997/1000000 [2:14:50<9:04:14, 25.72it/s]global step 160000, trans_decision ep_re 1325.2425009898354

{"global_step": 160000, "eval_re": [1325.2425009898352, 1325.2425009898352, 
1325.2425009898352, 1325.2425009898352, 1325.2425009898352, 1325.2425009898352, 
1325.2425009898352, 1325.2425009898352, 1325.2425009898352, 1325.2425009898352],
"eval_len": [409, 409, 409, 409, 409, 409, 409, 409, 409, 409]}

 17%|█▋        | 169999/1000000 [2:23:40<9:01:37, 25.54it/s]global step 170000, trans_decision ep_re 1035.452426389553

{"global_step": 170000, "eval_re": [1035.452426389553, 1035.452426389553, 
1035.452426389553, 1035.452426389553, 1035.452426389553, 1035.452426389553, 
1035.452426389553, 1035.452426389553, 1035.452426389553, 1035.452426389553], 
"eval_len": [332, 332, 332, 332, 332, 332, 332, 332, 332, 332]}

 18%|█▊        | 179999/1000000 [2:32:20<8:57:47, 25.41it/s]global step 180000, trans_decision ep_re 247.5395141205516

{"global_step": 180000, "eval_re": [247.53951412055156, 247.53951412055156, 
247.53951412055156, 247.53951412055156, 247.53951412055156, 247.53951412055156, 
247.53951412055156, 247.53951412055156, 247.53951412055156, 247.53951412055156],
"eval_len": [123, 123, 123, 123, 123, 123, 123, 123, 123, 123]}

 19%|█▉        | 189998/1000000 [2:41:00<8:37:59, 26.06it/s]global step 190000, trans_decision ep_re 1179.3625607961214

{"global_step": 190000, "eval_re": [1179.3625607961212, 1179.3625607961212, 
1179.3625607961212, 1179.3625607961212, 1179.3625607961212, 1179.3625607961212, 
1179.3625607961212, 1179.3625607961212, 1179.3625607961212, 1179.3625607961212],
"eval_len": [352, 352, 352, 352, 352, 352, 352, 352, 352, 352]}

 20%|█▉        | 199998/1000000 [2:49:50<8:34:40, 25.91it/s]global step 200000, trans_decision ep_re 484.8181927752228

{"global_step": 200000, "eval_re": [484.8181927752228, 484.8181927752228, 
484.8181927752228, 484.8181927752228, 484.8181927752228, 484.8181927752228, 
484.8181927752228, 484.8181927752228, 484.8181927752228, 484.8181927752228], 
"eval_len": [193, 193, 193, 193, 193, 193, 193, 193, 193, 193]}

 21%|██        | 209998/1000000 [2:58:12<8:26:51, 25.98it/s]global step 210000, trans_decision ep_re 343.7903315943372

{"global_step": 210000, "eval_re": [343.7903315943372, 343.7903315943372, 
343.7903315943372, 343.7903315943372, 343.7903315943372, 343.7903315943372, 
343.7903315943372, 343.7903315943372, 343.7903315943372, 343.7903315943372], 
"eval_len": [148, 148, 148, 148, 148, 148, 148, 148, 148, 148]}

 22%|██▏       | 219999/1000000 [3:06:51<8:25:38, 25.71it/s]global step 220000, trans_decision ep_re 160.29447542510775

{"global_step": 220000, "eval_re": [160.29447542510775, 160.29447542510775, 
160.29447542510775, 160.29447542510775, 160.29447542510775, 160.29447542510775, 
160.29447542510775, 160.29447542510775, 160.29447542510775, 160.29447542510775],
"eval_len": [95, 95, 95, 95, 95, 95, 95, 95, 95, 95]}

 23%|██▎       | 229997/1000000 [3:15:40<8:19:17, 25.70it/s]global step 230000, trans_decision ep_re 774.175836670769

{"global_step": 230000, "eval_re": [774.175836670769, 774.175836670769, 
774.175836670769, 774.175836670769, 774.175836670769, 774.175836670769, 
774.175836670769, 774.175836670769, 774.175836670769, 774.175836670769], 
"eval_len": [274, 274, 274, 274, 274, 274, 274, 274, 274, 274]}

 24%|██▍       | 239998/1000000 [3:24:30<8:06:11, 26.05it/s]global step 240000, trans_decision ep_re 1028.8515731898683

{"global_step": 240000, "eval_re": [1028.8515731898683, 1028.8515731898683, 
1028.8515731898683, 1028.8515731898683, 1028.8515731898683, 1028.8515731898683, 
1028.8515731898683, 1028.8515731898683, 1028.8515731898683, 1028.8515731898683],
"eval_len": [319, 319, 319, 319, 319, 319, 319, 319, 319, 319]}

 25%|██▍       | 249998/1000000 [3:33:10<8:01:17, 25.97it/s]global step 250000, trans_decision ep_re 155.5454115306723

{"global_step": 250000, "eval_re": [155.54541153067234, 155.54541153067234, 
155.54541153067234, 155.54541153067234, 155.54541153067234, 155.54541153067234, 
155.54541153067234, 155.54541153067234, 155.54541153067234, 155.54541153067234],
"eval_len": [94, 94, 94, 94, 94, 94, 94, 94, 94, 94]}

 26%|██▌       | 259998/1000000 [3:41:31<7:50:16, 26.23it/s]global step 260000, trans_decision ep_re 256.6753431761676

{"global_step": 260000, "eval_re": [256.6753431761676, 256.6753431761676, 
256.6753431761676, 256.6753431761676, 256.6753431761676, 256.6753431761676, 
256.6753431761676, 256.6753431761676, 256.6753431761676, 256.6753431761676], 
"eval_len": [126, 126, 126, 126, 126, 126, 126, 126, 126, 126]}

 27%|██▋       | 269998/1000000 [3:50:20<7:42:40, 26.30it/s]global step 270000, trans_decision ep_re 178.8735566448235

{"global_step": 270000, "eval_re": [178.87355664482348, 178.87355664482348, 
178.87355664482348, 178.87355664482348, 178.87355664482348, 178.87355664482348, 
178.87355664482348, 178.87355664482348, 178.87355664482348, 178.87355664482348],
"eval_len": [102, 102, 102, 102, 102, 102, 102, 102, 102, 102]}

 28%|██▊       | 279999/1000000 [3:58:50<7:44:40, 25.82it/s]global step 280000, trans_decision ep_re 297.4328802589198

{"global_step": 280000, "eval_re": [297.4328802589198, 297.4328802589198, 
297.4328802589198, 297.4328802589198, 297.4328802589198, 297.4328802589198, 
297.4328802589198, 297.4328802589198, 297.4328802589198, 297.4328802589198], 
"eval_len": [137, 137, 137, 137, 137, 137, 137, 137, 137, 137]}

 29%|██▉       | 289997/1000000 [4:07:12<7:36:41, 25.91it/s]global step 290000, trans_decision ep_re 156.93341336776848

{"global_step": 290000, "eval_re": [156.93341336776848, 156.93341336776848, 
156.93341336776848, 156.93341336776848, 156.93341336776848, 156.93341336776848, 
156.93341336776848, 156.93341336776848, 156.93341336776848, 156.93341336776848],
"eval_len": [94, 94, 94, 94, 94, 94, 94, 94, 94, 94]}

 30%|██▉       | 299998/1000000 [4:16:00<7:27:49, 26.05it/s]global step 300000, trans_decision ep_re 1014.8346266120864

{"global_step": 300000, "eval_re": [1014.8346266120863, 1014.8346266120863, 
1014.8346266120863, 1014.8346266120863, 1014.8346266120863, 1014.8346266120863, 
1014.8346266120863, 1014.8346266120863, 1014.8346266120863, 1014.8346266120863],
"eval_len": [319, 319, 319, 319, 319, 319, 319, 319, 319, 319]}

 31%|███       | 309997/1000000 [4:24:40<7:23:09, 25.95it/s]global step 310000, trans_decision ep_re 1193.0109775196556

{"global_step": 310000, "eval_re": [1193.0109775196554, 1193.0109775196554, 
1193.0109775196554, 1193.0109775196554, 1193.0109775196554, 1193.0109775196554, 
1193.0109775196554, 1193.0109775196554, 1193.0109775196554, 1193.0109775196554],
"eval_len": [363, 363, 363, 363, 363, 363, 363, 363, 363, 363]}

 32%|███▏      | 319997/1000000 [4:33:20<7:16:53, 25.94it/s]global step 320000, trans_decision ep_re 978.9533009576269

{"global_step": 320000, "eval_re": [978.953300957627, 978.953300957627, 
978.953300957627, 978.953300957627, 978.953300957627, 978.953300957627, 
978.953300957627, 978.953300957627, 978.953300957627, 978.953300957627], 
"eval_len": [333, 333, 333, 333, 333, 333, 333, 333, 333, 333]}

 33%|███▎      | 329998/1000000 [4:42:00<7:08:11, 26.08it/s]global step 330000, trans_decision ep_re 1010.5137211447578

{"global_step": 330000, "eval_re": [1010.5137211447577, 1010.5137211447577, 
1010.5137211447577, 1010.5137211447577, 1010.5137211447577, 1010.5137211447577, 
1010.5137211447577, 1010.5137211447577, 1010.5137211447577, 1010.5137211447577],
"eval_len": [355, 355, 355, 355, 355, 355, 355, 355, 355, 355]}

 34%|███▍      | 339999/1000000 [4:50:40<7:02:06, 26.06it/s]global step 340000, trans_decision ep_re 1557.383013923872

{"global_step": 340000, "eval_re": [1557.3830139238719, 1557.3830139238719, 
1557.3830139238719, 1557.3830139238719, 1557.3830139238719, 1557.3830139238719, 
1557.3830139238719, 1557.3830139238719, 1557.3830139238719, 1557.3830139238719],
"eval_len": [472, 472, 472, 472, 472, 472, 472, 472, 472, 472]}

 35%|███▍      | 349997/1000000 [4:59:20<6:59:46, 25.81it/s]global step 350000, trans_decision ep_re 926.2678104556655

{"global_step": 350000, "eval_re": [926.2678104556655, 926.2678104556655, 
926.2678104556655, 926.2678104556655, 926.2678104556655, 926.2678104556655, 
926.2678104556655, 926.2678104556655, 926.2678104556655, 926.2678104556655], 
"eval_len": [276, 276, 276, 276, 276, 276, 276, 276, 276, 276]}

 36%|███▌      | 359999/1000000 [5:08:00<6:48:49, 26.09it/s]global step 360000, trans_decision ep_re 1069.0124927269135

{"global_step": 360000, "eval_re": [1069.0124927269135, 1069.0124927269135, 
1069.0124927269135, 1069.0124927269135, 1069.0124927269135, 1069.0124927269135, 
1069.0124927269135, 1069.0124927269135, 1069.0124927269135, 1069.0124927269135],
"eval_len": [357, 357, 357, 357, 357, 357, 357, 357, 357, 357]}

 37%|███▋      | 369999/1000000 [5:16:40<6:42:49, 26.07it/s]global step 370000, trans_decision ep_re 3351.2180925505577

{"global_step": 370000, "eval_re": [3351.2180925505577, 3351.2180925505577, 
3351.2180925505577, 3351.2180925505577, 3351.2180925505577, 3351.2180925505577, 
3351.2180925505577, 3351.2180925505577, 3351.2180925505577, 3351.2180925505577],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379997/1000000 [5:25:40<6:38:51, 25.91it/s]global step 380000, trans_decision ep_re 362.2376660083295

{"global_step": 380000, "eval_re": [362.2376660083295, 362.2376660083295, 
362.2376660083295, 362.2376660083295, 362.2376660083295, 362.2376660083295, 
362.2376660083295, 362.2376660083295, 362.2376660083295, 362.2376660083295], 
"eval_len": [157, 157, 157, 157, 157, 157, 157, 157, 157, 157]}

 39%|███▉      | 389997/1000000 [5:34:20<6:37:55, 25.55it/s]global step 390000, trans_decision ep_re 3359.4798661258114

{"global_step": 390000, "eval_re": [3359.4798661258114, 3359.4798661258114, 
3359.4798661258114, 3359.4798661258114, 3359.4798661258114, 3359.4798661258114, 
3359.4798661258114, 3359.4798661258114, 3359.4798661258114, 3359.4798661258114],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399997/1000000 [5:43:20<6:27:22, 25.82it/s]global step 400000, trans_decision ep_re 3196.76832139444

{"global_step": 400000, "eval_re": [3196.76832139444, 3196.76832139444, 
3196.76832139444, 3196.76832139444, 3196.76832139444, 3196.76832139444, 
3196.76832139444, 3196.76832139444, 3196.76832139444, 3196.76832139444], 
"eval_len": [909, 909, 909, 909, 909, 909, 909, 909, 909, 909]}

 41%|████      | 409999/1000000 [5:52:20<6:20:02, 25.87it/s]global step 410000, trans_decision ep_re 757.2121380311568

{"global_step": 410000, "eval_re": [757.2121380311569, 757.2121380311569, 
757.2121380311569, 757.2121380311569, 757.2121380311569, 757.2121380311569, 
757.2121380311569, 757.2121380311569, 757.2121380311569, 757.2121380311569], 
"eval_len": [256, 256, 256, 256, 256, 256, 256, 256, 256, 256]}

 42%|████▏     | 419997/1000000 [6:01:00<6:15:42, 25.73it/s]global step 420000, trans_decision ep_re 373.37172999635084

{"global_step": 420000, "eval_re": [373.3717299963508, 373.3717299963508, 
373.3717299963508, 373.3717299963508, 373.3717299963508, 373.3717299963508, 
373.3717299963508, 373.3717299963508, 373.3717299963508, 373.3717299963508], 
"eval_len": [157, 157, 157, 157, 157, 157, 157, 157, 157, 157]}

 43%|████▎     | 429998/1000000 [6:09:30<6:02:27, 26.21it/s]global step 430000, trans_decision ep_re 3414.6572167810978

{"global_step": 430000, "eval_re": [3414.6572167810978, 3414.6572167810978, 
3414.6572167810978, 3414.6572167810978, 3414.6572167810978, 3414.6572167810978, 
3414.6572167810978, 3414.6572167810978, 3414.6572167810978, 3414.6572167810978],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [6:18:30<6:00:40, 25.88it/s]global step 440000, trans_decision ep_re 1960.2347430792845

{"global_step": 440000, "eval_re": [1960.2347430792847, 1960.2347430792847, 
1960.2347430792847, 1960.2347430792847, 1960.2347430792847, 1960.2347430792847, 
1960.2347430792847, 1960.2347430792847, 1960.2347430792847, 1960.2347430792847],
"eval_len": [588, 588, 588, 588, 588, 588, 588, 588, 588, 588]}

 45%|████▍     | 449998/1000000 [6:27:20<5:51:12, 26.10it/s]global step 450000, trans_decision ep_re 878.1438053321359

{"global_step": 450000, "eval_re": [878.1438053321359, 878.1438053321359, 
878.1438053321359, 878.1438053321359, 878.1438053321359, 878.1438053321359, 
878.1438053321359, 878.1438053321359, 878.1438053321359, 878.1438053321359], 
"eval_len": [255, 255, 255, 255, 255, 255, 255, 255, 255, 255]}

 46%|████▌     | 459997/1000000 [6:36:00<5:48:42, 25.81it/s]global step 460000, trans_decision ep_re 540.3382386821897

{"global_step": 460000, "eval_re": [540.3382386821897, 540.3382386821897, 
540.3382386821897, 540.3382386821897, 540.3382386821897, 540.3382386821897, 
540.3382386821897, 540.3382386821897, 540.3382386821897, 540.3382386821897], 
"eval_len": [207, 207, 207, 207, 207, 207, 207, 207, 207, 207]}

 47%|████▋     | 469998/1000000 [6:44:40<5:39:17, 26.03it/s]global step 470000, trans_decision ep_re 3372.7854079648487

{"global_step": 470000, "eval_re": [3372.785407964849, 3372.785407964849, 
3372.785407964849, 3372.785407964849, 3372.785407964849, 3372.785407964849, 
3372.785407964849, 3372.785407964849, 3372.785407964849, 3372.785407964849], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479998/1000000 [6:53:40<5:29:14, 26.32it/s]global step 480000, trans_decision ep_re 652.0209436707785

{"global_step": 480000, "eval_re": [652.0209436707786, 652.0209436707786, 
652.0209436707786, 652.0209436707786, 652.0209436707786, 652.0209436707786, 
652.0209436707786, 652.0209436707786, 652.0209436707786, 652.0209436707786], 
"eval_len": [205, 205, 205, 205, 205, 205, 205, 205, 205, 205]}

 49%|████▉     | 489998/1000000 [7:02:20<5:23:45, 26.25it/s]global step 490000, trans_decision ep_re 237.30809012185787

{"global_step": 490000, "eval_re": [237.30809012185787, 237.30809012185787, 
237.30809012185787, 237.30809012185787, 237.30809012185787, 237.30809012185787, 
237.30809012185787, 237.30809012185787, 237.30809012185787, 237.30809012185787],
"eval_len": [119, 119, 119, 119, 119, 119, 119, 119, 119, 119]}

 50%|████▉     | 499999/1000000 [7:10:50<5:22:14, 25.86it/s]global step 500000, trans_decision ep_re 283.55674303180274

{"global_step": 500000, "eval_re": [283.5567430318027, 283.5567430318027, 
283.5567430318027, 283.5567430318027, 283.5567430318027, 283.5567430318027, 
283.5567430318027, 283.5567430318027, 283.5567430318027, 283.5567430318027], 
"eval_len": [141, 141, 141, 141, 141, 141, 141, 141, 141, 141]}

 51%|█████     | 509997/1000000 [7:19:30<5:17:30, 25.72it/s]global step 510000, trans_decision ep_re 2142.7134882079768

{"global_step": 510000, "eval_re": [2142.7134882079768, 2142.7134882079768, 
2142.7134882079768, 2142.7134882079768, 2142.7134882079768, 2142.7134882079768, 
2142.7134882079768, 2142.7134882079768, 2142.7134882079768, 2142.7134882079768],
"eval_len": [659, 659, 659, 659, 659, 659, 659, 659, 659, 659]}

 52%|█████▏    | 519999/1000000 [7:28:20<5:10:47, 25.74it/s]global step 520000, trans_decision ep_re 425.2636753764149

{"global_step": 520000, "eval_re": [425.2636753764149, 425.2636753764149, 
425.2636753764149, 425.2636753764149, 425.2636753764149, 425.2636753764149, 
425.2636753764149, 425.2636753764149, 425.2636753764149, 425.2636753764149], 
"eval_len": [172, 172, 172, 172, 172, 172, 172, 172, 172, 172]}

 53%|█████▎    | 529997/1000000 [7:37:00<5:01:47, 25.96it/s]global step 530000, trans_decision ep_re 1027.0772901041826

{"global_step": 530000, "eval_re": [1027.0772901041828, 1027.0772901041828, 
1027.0772901041828, 1027.0772901041828, 1027.0772901041828, 1027.0772901041828, 
1027.0772901041828, 1027.0772901041828, 1027.0772901041828, 1027.0772901041828],
"eval_len": [289, 289, 289, 289, 289, 289, 289, 289, 289, 289]}

 54%|█████▍    | 539999/1000000 [7:45:23<4:57:39, 25.76it/s]global step 540000, trans_decision ep_re 80.47951936463207

{"global_step": 540000, "eval_re": [80.47951936463205, 80.47951936463205, 
80.47951936463205, 80.47951936463205, 80.47951936463205, 80.47951936463205, 
80.47951936463205, 80.47951936463205, 80.47951936463205, 80.47951936463205], 
"eval_len": [73, 73, 73, 73, 73, 73, 73, 73, 73, 73]}

 55%|█████▍    | 549997/1000000 [7:54:10<4:49:08, 25.94it/s]global step 550000, trans_decision ep_re 586.4092992451857

{"global_step": 550000, "eval_re": [586.4092992451856, 586.4092992451856, 
586.4092992451856, 586.4092992451856, 586.4092992451856, 586.4092992451856, 
586.4092992451856, 586.4092992451856, 586.4092992451856, 586.4092992451856], 
"eval_len": [214, 214, 214, 214, 214, 214, 214, 214, 214, 214]}

 56%|█████▌    | 559999/1000000 [8:02:50<4:39:24, 26.25it/s]global step 560000, trans_decision ep_re 989.8214511799218

{"global_step": 560000, "eval_re": [989.8214511799218, 989.8214511799218, 
989.8214511799218, 989.8214511799218, 989.8214511799218, 989.8214511799218, 
989.8214511799218, 989.8214511799218, 989.8214511799218, 989.8214511799218], 
"eval_len": [289, 289, 289, 289, 289, 289, 289, 289, 289, 289]}

 57%|█████▋    | 569998/1000000 [8:11:20<4:28:27, 26.70it/s]global step 570000, trans_decision ep_re 1247.276279898268

{"global_step": 570000, "eval_re": [1247.2762798982683, 1247.2762798982683, 
1247.2762798982683, 1247.2762798982683, 1247.2762798982683, 1247.2762798982683, 
1247.2762798982683, 1247.2762798982683, 1247.2762798982683, 1247.2762798982683],
"eval_len": [364, 364, 364, 364, 364, 364, 364, 364, 364, 364]}

 58%|█████▊    | 579998/1000000 [8:19:50<4:23:49, 26.53it/s]global step 580000, trans_decision ep_re 1544.33278484389

{"global_step": 580000, "eval_re": [1544.3327848438903, 1544.3327848438903, 
1544.3327848438903, 1544.3327848438903, 1544.3327848438903, 1544.3327848438903, 
1544.3327848438903, 1544.3327848438903, 1544.3327848438903, 1544.3327848438903],
"eval_len": [422, 422, 422, 422, 422, 422, 422, 422, 422, 422]}

 59%|█████▉    | 589998/1000000 [8:28:30<4:15:28, 26.75it/s]global step 590000, trans_decision ep_re 2916.6606321053505

{"global_step": 590000, "eval_re": [2916.66063210535, 2916.66063210535, 
2916.66063210535, 2916.66063210535, 2916.66063210535, 2916.66063210535, 
2916.66063210535, 2916.66063210535, 2916.66063210535, 2916.66063210535], 
"eval_len": [854, 854, 854, 854, 854, 854, 854, 854, 854, 854]}

 60%|█████▉    | 599998/1000000 [8:37:20<4:14:35, 26.19it/s]global step 600000, trans_decision ep_re 1020.3762020741182

{"global_step": 600000, "eval_re": [1020.3762020741183, 1020.3762020741183, 
1020.3762020741183, 1020.3762020741183, 1020.3762020741183, 1020.3762020741183, 
1020.3762020741183, 1020.3762020741183, 1020.3762020741183, 1020.3762020741183],
"eval_len": [297, 297, 297, 297, 297, 297, 297, 297, 297, 297]}

 61%|██████    | 609998/1000000 [8:45:50<4:04:44, 26.56it/s]global step 610000, trans_decision ep_re 563.7836218524691

{"global_step": 610000, "eval_re": [563.7836218524691, 563.7836218524691, 
563.7836218524691, 563.7836218524691, 563.7836218524691, 563.7836218524691, 
563.7836218524691, 563.7836218524691, 563.7836218524691, 563.7836218524691], 
"eval_len": [203, 203, 203, 203, 203, 203, 203, 203, 203, 203]}

 62%|██████▏   | 619998/1000000 [8:54:20<3:58:14, 26.58it/s]global step 620000, trans_decision ep_re 332.4880559580821

{"global_step": 620000, "eval_re": [332.4880559580821, 332.4880559580821, 
332.4880559580821, 332.4880559580821, 332.4880559580821, 332.4880559580821, 
332.4880559580821, 332.4880559580821, 332.4880559580821, 332.4880559580821], 
"eval_len": [142, 142, 142, 142, 142, 142, 142, 142, 142, 142]}

 63%|██████▎   | 629998/1000000 [9:02:50<3:52:45, 26.49it/s]global step 630000, trans_decision ep_re 3289.45373514957

{"global_step": 630000, "eval_re": [3289.4537351495705, 3289.4537351495705, 
3289.4537351495705, 3289.4537351495705, 3289.4537351495705, 3289.4537351495705, 
3289.4537351495705, 3289.4537351495705, 3289.4537351495705, 3289.4537351495705],
"eval_len": [947, 947, 947, 947, 947, 947, 947, 947, 947, 947]}

 64%|██████▍   | 639998/1000000 [9:11:40<3:45:46, 26.58it/s]global step 640000, trans_decision ep_re 515.1099588352489

{"global_step": 640000, "eval_re": [515.1099588352489, 515.1099588352489, 
515.1099588352489, 515.1099588352489, 515.1099588352489, 515.1099588352489, 
515.1099588352489, 515.1099588352489, 515.1099588352489, 515.1099588352489], 
"eval_len": [193, 193, 193, 193, 193, 193, 193, 193, 193, 193]}

 65%|██████▍   | 649998/1000000 [9:20:10<3:39:01, 26.63it/s]global step 650000, trans_decision ep_re 3434.2242511511226

{"global_step": 650000, "eval_re": [3434.224251151123, 3434.224251151123, 
3434.224251151123, 3434.224251151123, 3434.224251151123, 3434.224251151123, 
3434.224251151123, 3434.224251151123, 3434.224251151123, 3434.224251151123], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [9:29:10<3:35:39, 26.28it/s]global step 660000, trans_decision ep_re 3477.6091912487527

{"global_step": 660000, "eval_re": [3477.6091912487527, 3477.6091912487527, 
3477.6091912487527, 3477.6091912487527, 3477.6091912487527, 3477.6091912487527, 
3477.6091912487527, 3477.6091912487527, 3477.6091912487527, 3477.6091912487527],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [9:38:00<3:25:33, 26.76it/s]global step 670000, trans_decision ep_re 3447.9522505616396

{"global_step": 670000, "eval_re": [3447.95225056164, 3447.95225056164, 
3447.95225056164, 3447.95225056164, 3447.95225056164, 3447.95225056164, 
3447.95225056164, 3447.95225056164, 3447.95225056164, 3447.95225056164], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [9:47:00<3:22:13, 26.37it/s]global step 680000, trans_decision ep_re 3393.8756578989983

{"global_step": 680000, "eval_re": [3393.875657898998, 3393.875657898998, 
3393.875657898998, 3393.875657898998, 3393.875657898998, 3393.875657898998, 
3393.875657898998, 3393.875657898998, 3393.875657898998, 3393.875657898998], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689998/1000000 [9:55:50<3:13:12, 26.74it/s]global step 690000, trans_decision ep_re 3368.982585338171

{"global_step": 690000, "eval_re": [3368.9825853381703, 3368.9825853381703, 
3368.9825853381703, 3368.9825853381703, 3368.9825853381703, 3368.9825853381703, 
3368.9825853381703, 3368.9825853381703, 3368.9825853381703, 3368.9825853381703],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [10:04:40<3:09:42, 26.36it/s]global step 700000, trans_decision ep_re 2911.343925692242

{"global_step": 700000, "eval_re": [2911.3439256922416, 2911.3439256922416, 
2911.3439256922416, 2911.3439256922416, 2911.3439256922416, 2911.3439256922416, 
2911.3439256922416, 2911.3439256922416, 2911.3439256922416, 2911.3439256922416],
"eval_len": [809, 809, 809, 809, 809, 809, 809, 809, 809, 809]}

 71%|███████   | 709998/1000000 [10:13:30<3:01:04, 26.69it/s]global step 710000, trans_decision ep_re 971.3624513208822

{"global_step": 710000, "eval_re": [971.3624513208824, 971.3624513208824, 
971.3624513208824, 971.3624513208824, 971.3624513208824, 971.3624513208824, 
971.3624513208824, 971.3624513208824, 971.3624513208824, 971.3624513208824], 
"eval_len": [290, 290, 290, 290, 290, 290, 290, 290, 290, 290]}

 72%|███████▏  | 719998/1000000 [10:22:00<2:54:53, 26.68it/s]global step 720000, trans_decision ep_re 229.9656512253758

{"global_step": 720000, "eval_re": [229.96565122537578, 229.96565122537578, 
229.96565122537578, 229.96565122537578, 229.96565122537578, 229.96565122537578, 
229.96565122537578, 229.96565122537578, 229.96565122537578, 229.96565122537578],
"eval_len": [121, 121, 121, 121, 121, 121, 121, 121, 121, 121]}

 73%|███████▎  | 729999/1000000 [10:30:30<2:53:05, 26.00it/s]global step 730000, trans_decision ep_re 2257.277434393084

{"global_step": 730000, "eval_re": [2257.2774343930846, 2257.2774343930846, 
2257.2774343930846, 2257.2774343930846, 2257.2774343930846, 2257.2774343930846, 
2257.2774343930846, 2257.2774343930846, 2257.2774343930846, 2257.2774343930846],
"eval_len": [621, 621, 621, 621, 621, 621, 621, 621, 621, 621]}

 74%|███████▍  | 739998/1000000 [10:39:01<2:43:06, 26.57it/s]global step 740000, trans_decision ep_re 522.5367318215849

{"global_step": 740000, "eval_re": [522.5367318215849, 522.5367318215849, 
522.5367318215849, 522.5367318215849, 522.5367318215849, 522.5367318215849, 
522.5367318215849, 522.5367318215849, 522.5367318215849, 522.5367318215849], 
"eval_len": [193, 193, 193, 193, 193, 193, 193, 193, 193, 193]}

 75%|███████▍  | 749997/1000000 [10:47:50<2:39:30, 26.12it/s]global step 750000, trans_decision ep_re 403.459895744172

{"global_step": 750000, "eval_re": [403.45989574417194, 403.45989574417194, 
403.45989574417194, 403.45989574417194, 403.45989574417194, 403.45989574417194, 
403.45989574417194, 403.45989574417194, 403.45989574417194, 403.45989574417194],
"eval_len": [163, 163, 163, 163, 163, 163, 163, 163, 163, 163]}

 76%|███████▌  | 759999/1000000 [10:56:20<2:33:18, 26.09it/s]global step 760000, trans_decision ep_re 1711.0233885517396

{"global_step": 760000, "eval_re": [1711.0233885517396, 1711.0233885517396, 
1711.0233885517396, 1711.0233885517396, 1711.0233885517396, 1711.0233885517396, 
1711.0233885517396, 1711.0233885517396, 1711.0233885517396, 1711.0233885517396],
"eval_len": [473, 473, 473, 473, 473, 473, 473, 473, 473, 473]}

 77%|███████▋  | 769998/1000000 [11:05:00<2:24:24, 26.55it/s]global step 770000, trans_decision ep_re 1765.2683939742124

{"global_step": 770000, "eval_re": [1765.2683939742126, 1765.2683939742126, 
1765.2683939742126, 1765.2683939742126, 1765.2683939742126, 1765.2683939742126, 
1765.2683939742126, 1765.2683939742126, 1765.2683939742126, 1765.2683939742126],
"eval_len": [474, 474, 474, 474, 474, 474, 474, 474, 474, 474]}

 78%|███████▊  | 779998/1000000 [11:13:40<2:17:43, 26.62it/s]global step 780000, trans_decision ep_re 915.7812609672632

{"global_step": 780000, "eval_re": [915.7812609672632, 915.7812609672632, 
915.7812609672632, 915.7812609672632, 915.7812609672632, 915.7812609672632, 
915.7812609672632, 915.7812609672632, 915.7812609672632, 915.7812609672632], 
"eval_len": [306, 306, 306, 306, 306, 306, 306, 306, 306, 306]}

 79%|███████▉  | 789998/1000000 [11:22:20<2:12:09, 26.48it/s]global step 790000, trans_decision ep_re 3451.8729791788887

{"global_step": 790000, "eval_re": [3451.8729791788887, 3451.8729791788887, 
3451.8729791788887, 3451.8729791788887, 3451.8729791788887, 3451.8729791788887, 
3451.8729791788887, 3451.8729791788887, 3451.8729791788887, 3451.8729791788887],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [11:31:20<2:05:40, 26.52it/s]global step 800000, trans_decision ep_re 3135.5844544022957

{"global_step": 800000, "eval_re": [3135.584454402296, 3135.584454402296, 
3135.584454402296, 3135.584454402296, 3135.584454402296, 3135.584454402296, 
3135.584454402296, 3135.584454402296, 3135.584454402296, 3135.584454402296], 
"eval_len": [916, 916, 916, 916, 916, 916, 916, 916, 916, 916]}

 81%|████████  | 809998/1000000 [11:40:10<1:59:52, 26.42it/s]global step 810000, trans_decision ep_re 2687.140704215657

{"global_step": 810000, "eval_re": [2687.140704215657, 2687.140704215657, 
2687.140704215657, 2687.140704215657, 2687.140704215657, 2687.140704215657, 
2687.140704215657, 2687.140704215657, 2687.140704215657, 2687.140704215657], 
"eval_len": [702, 702, 702, 702, 702, 702, 702, 702, 702, 702]}

 82%|████████▏ | 819999/1000000 [11:49:00<1:54:39, 26.17it/s]global step 820000, trans_decision ep_re 2818.6997393584384

{"global_step": 820000, "eval_re": [2818.6997393584384, 2818.6997393584384, 
2818.6997393584384, 2818.6997393584384, 2818.6997393584384, 2818.6997393584384, 
2818.6997393584384, 2818.6997393584384, 2818.6997393584384, 2818.6997393584384],
"eval_len": [722, 722, 722, 722, 722, 722, 722, 722, 722, 722]}

 83%|████████▎ | 829999/1000000 [11:57:50<1:48:54, 26.02it/s]global step 830000, trans_decision ep_re 1469.2934059191118

{"global_step": 830000, "eval_re": [1469.2934059191118, 1469.2934059191118, 
1469.2934059191118, 1469.2934059191118, 1469.2934059191118, 1469.2934059191118, 
1469.2934059191118, 1469.2934059191118, 1469.2934059191118, 1469.2934059191118],
"eval_len": [393, 393, 393, 393, 393, 393, 393, 393, 393, 393]}

 84%|████████▍ | 839997/1000000 [12:06:30<1:41:51, 26.18it/s]global step 840000, trans_decision ep_re 1159.5842630065692

{"global_step": 840000, "eval_re": [1159.5842630065692, 1159.5842630065692, 
1159.5842630065692, 1159.5842630065692, 1159.5842630065692, 1159.5842630065692, 
1159.5842630065692, 1159.5842630065692, 1159.5842630065692, 1159.5842630065692],
"eval_len": [313, 313, 313, 313, 313, 313, 313, 313, 313, 313]}

 85%|████████▍ | 849999/1000000 [12:15:00<1:35:09, 26.27it/s]global step 850000, trans_decision ep_re 2484.989765854349

{"global_step": 850000, "eval_re": [2484.989765854349, 2484.989765854349, 
2484.989765854349, 2484.989765854349, 2484.989765854349, 2484.989765854349, 
2484.989765854349, 2484.989765854349, 2484.989765854349, 2484.989765854349], 
"eval_len": [652, 652, 652, 652, 652, 652, 652, 652, 652, 652]}

 86%|████████▌ | 859997/1000000 [12:23:50<1:29:03, 26.20it/s]global step 860000, trans_decision ep_re 845.275975645796

{"global_step": 860000, "eval_re": [845.275975645796, 845.275975645796, 
845.275975645796, 845.275975645796, 845.275975645796, 845.275975645796, 
845.275975645796, 845.275975645796, 845.275975645796, 845.275975645796], 
"eval_len": [267, 267, 267, 267, 267, 267, 267, 267, 267, 267]}

 87%|████████▋ | 869998/1000000 [12:32:20<1:21:42, 26.52it/s]global step 870000, trans_decision ep_re 828.3212329361353

{"global_step": 870000, "eval_re": [828.3212329361353, 828.3212329361353, 
828.3212329361353, 828.3212329361353, 828.3212329361353, 828.3212329361353, 
828.3212329361353, 828.3212329361353, 828.3212329361353, 828.3212329361353], 
"eval_len": [268, 268, 268, 268, 268, 268, 268, 268, 268, 268]}

 88%|████████▊ | 879999/1000000 [12:40:50<1:16:32, 26.13it/s]global step 880000, trans_decision ep_re 2319.91543976945

{"global_step": 880000, "eval_re": [2319.9154397694497, 2319.9154397694497, 
2319.9154397694497, 2319.9154397694497, 2319.9154397694497, 2319.9154397694497, 
2319.9154397694497, 2319.9154397694497, 2319.9154397694497, 2319.9154397694497],
"eval_len": [616, 616, 616, 616, 616, 616, 616, 616, 616, 616]}

 89%|████████▉ | 889998/1000000 [12:49:40<1:09:04, 26.54it/s]global step 890000, trans_decision ep_re 153.01613549742348

{"global_step": 890000, "eval_re": [153.01613549742348, 153.01613549742348, 
153.01613549742348, 153.01613549742348, 153.01613549742348, 153.01613549742348, 
153.01613549742348, 153.01613549742348, 153.01613549742348, 153.01613549742348],
"eval_len": [88, 88, 88, 88, 88, 88, 88, 88, 88, 88]}

 90%|████████▉ | 899997/1000000 [12:58:10<1:03:33, 26.22it/s]global step 900000, trans_decision ep_re 2910.6580851220133

{"global_step": 900000, "eval_re": [2910.6580851220133, 2910.6580851220133, 
2910.6580851220133, 2910.6580851220133, 2910.6580851220133, 2910.6580851220133, 
2910.6580851220133, 2910.6580851220133, 2910.6580851220133, 2910.6580851220133],
"eval_len": [830, 830, 830, 830, 830, 830, 830, 830, 830, 830]}

 91%|█████████ | 909998/1000000 [13:07:00<56:59, 26.32it/s]global step 910000, trans_decision ep_re 1892.2827819517381

{"global_step": 910000, "eval_re": [1892.2827819517383, 1892.2827819517383, 
1892.2827819517383, 1892.2827819517383, 1892.2827819517383, 1892.2827819517383, 
1892.2827819517383, 1892.2827819517383, 1892.2827819517383, 1892.2827819517383],
"eval_len": [579, 579, 579, 579, 579, 579, 579, 579, 579, 579]}

 92%|█████████▏| 919999/1000000 [13:15:50<51:10, 26.05it/s]global step 920000, trans_decision ep_re 1683.848157229962

{"global_step": 920000, "eval_re": [1683.848157229962, 1683.848157229962, 
1683.848157229962, 1683.848157229962, 1683.848157229962, 1683.848157229962, 
1683.848157229962, 1683.848157229962, 1683.848157229962, 1683.848157229962], 
"eval_len": [451, 451, 451, 451, 451, 451, 451, 451, 451, 451]}

 93%|█████████▎| 929998/1000000 [13:24:30<43:53, 26.58it/s]global step 930000, trans_decision ep_re 1018.328327099856

{"global_step": 930000, "eval_re": [1018.328327099856, 1018.328327099856, 
1018.328327099856, 1018.328327099856, 1018.328327099856, 1018.328327099856, 
1018.328327099856, 1018.328327099856, 1018.328327099856, 1018.328327099856], 
"eval_len": [276, 276, 276, 276, 276, 276, 276, 276, 276, 276]}

 94%|█████████▍| 939998/1000000 [13:33:00<37:25, 26.73it/s]global step 940000, trans_decision ep_re 1008.4450157255303

{"global_step": 940000, "eval_re": [1008.4450157255304, 1008.4450157255304, 
1008.4450157255304, 1008.4450157255304, 1008.4450157255304, 1008.4450157255304, 
1008.4450157255304, 1008.4450157255304, 1008.4450157255304, 1008.4450157255304],
"eval_len": [307, 307, 307, 307, 307, 307, 307, 307, 307, 307]}

 95%|█████████▍| 949998/1000000 [13:41:40<31:36, 26.36it/s]global step 950000, trans_decision ep_re 897.2823898351522

{"global_step": 950000, "eval_re": [897.2823898351522, 897.2823898351522, 
897.2823898351522, 897.2823898351522, 897.2823898351522, 897.2823898351522, 
897.2823898351522, 897.2823898351522, 897.2823898351522, 897.2823898351522], 
"eval_len": [277, 277, 277, 277, 277, 277, 277, 277, 277, 277]}

 96%|█████████▌| 959998/1000000 [13:50:10<25:09, 26.50it/s]global step 960000, trans_decision ep_re 1157.4904711607371

{"global_step": 960000, "eval_re": [1157.490471160737, 1157.490471160737, 
1157.490471160737, 1157.490471160737, 1157.490471160737, 1157.490471160737, 
1157.490471160737, 1157.490471160737, 1157.490471160737, 1157.490471160737], 
"eval_len": [324, 324, 324, 324, 324, 324, 324, 324, 324, 324]}

 97%|█████████▋| 969998/1000000 [13:58:50<18:57, 26.38it/s]global step 970000, trans_decision ep_re 2116.4794731807315

{"global_step": 970000, "eval_re": [2116.4794731807315, 2116.4794731807315, 
2116.4794731807315, 2116.4794731807315, 2116.4794731807315, 2116.4794731807315, 
2116.4794731807315, 2116.4794731807315, 2116.4794731807315, 2116.4794731807315],
"eval_len": [564, 564, 564, 564, 564, 564, 564, 564, 564, 564]}

 98%|█████████▊| 979999/1000000 [14:07:16<12:47, 26.07it/s]global step 980000, trans_decision ep_re 47.68270573297745

{"global_step": 980000, "eval_re": [47.68270573297745, 47.68270573297745, 
47.68270573297745, 47.68270573297745, 47.68270573297745, 47.68270573297745, 
47.68270573297745, 47.68270573297745, 47.68270573297745, 47.68270573297745], 
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 99%|█████████▉| 989999/1000000 [14:16:00<06:22, 26.15it/s]global step 990000, trans_decision ep_re 2497.7505099552914

{"global_step": 990000, "eval_re": [2497.7505099552914, 2497.7505099552914, 
2497.7505099552914, 2497.7505099552914, 2497.7505099552914, 2497.7505099552914, 
2497.7505099552914, 2497.7505099552914, 2497.7505099552914, 2497.7505099552914],
"eval_len": [682, 682, 682, 682, 682, 682, 682, 682, 682, 682]}

100%|█████████▉| 999998/1000000 [14:24:50<00:00, 26.62it/s]global step 1000000, trans_decision ep_re 1574.7356157747658

{"global_step": 1000000, "eval_re": [1574.735615774766, 1574.735615774766, 
1574.735615774766, 1574.735615774766, 1574.735615774766, 1574.735615774766, 
1574.735615774766, 1574.735615774766, 1574.735615774766, 1574.735615774766], 
"eval_len": [399, 399, 399, 399, 399, 399, 399, 399, 399, 399]}

100%|██████████| 1000000/1000000 [14:25:04<00:00, 19.27it/s]
