
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [05:40<13:11:14, 20.85it/s]global step 10000, trans_decision ep_re 747.162944179501

{"global_step": 10000, "eval_re": [747.162944179501, 747.162944179501, 
747.162944179501, 747.162944179501, 747.162944179501, 747.162944179501, 
747.162944179501, 747.162944179501, 747.162944179501, 747.162944179501], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [17:10<13:10:36, 20.66it/s]global step 20000, trans_decision ep_re 352.7786356569591

{"global_step": 20000, "eval_re": [352.7786356569591, 352.7786356569591, 
352.7786356569591, 352.7786356569591, 352.7786356569591, 352.7786356569591, 
352.7786356569591, 352.7786356569591, 352.7786356569591, 352.7786356569591], 
"eval_len": [355, 355, 355, 355, 355, 355, 355, 355, 355, 355]}

  3%|▎         | 29999/1000000 [28:10<13:16:40, 20.29it/s]global step 30000, trans_decision ep_re 39.72365724314172

{"global_step": 30000, "eval_re": [39.72365724314173, 39.72365724314173, 
39.72365724314173, 39.72365724314173, 39.72365724314173, 39.72365724314173, 
39.72365724314173, 39.72365724314173, 39.72365724314173, 39.72365724314173], 
"eval_len": [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]}

  4%|▍         | 39997/1000000 [39:10<13:01:14, 20.48it/s]global step 40000, trans_decision ep_re 1203.5848799019182

{"global_step": 40000, "eval_re": [1203.5848799019182, 1203.5848799019182, 
1203.5848799019182, 1203.5848799019182, 1203.5848799019182, 1203.5848799019182, 
1203.5848799019182, 1203.5848799019182, 1203.5848799019182, 1203.5848799019182],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49997/1000000 [50:40<12:50:09, 20.56it/s]global step 50000, trans_decision ep_re 984.6364747774657

{"global_step": 50000, "eval_re": [984.6364747774657, 984.6364747774657, 
984.6364747774657, 984.6364747774657, 984.6364747774657, 984.6364747774657, 
984.6364747774657, 984.6364747774657, 984.6364747774657, 984.6364747774657], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [1:02:10<12:46:46, 20.43it/s]global step 60000, trans_decision ep_re 387.00586808629566

{"global_step": 60000, "eval_re": [387.00586808629566, 387.00586808629566, 
387.00586808629566, 387.00586808629566, 387.00586808629566, 387.00586808629566, 
387.00586808629566, 387.00586808629566, 387.00586808629566, 387.00586808629566],
"eval_len": [311, 311, 311, 311, 311, 311, 311, 311, 311, 311]}

  7%|▋         | 69999/1000000 [1:13:10<12:30:10, 20.66it/s]global step 70000, trans_decision ep_re 220.61621512155907

{"global_step": 70000, "eval_re": [220.6162151215591, 220.6162151215591, 
220.6162151215591, 220.6162151215591, 220.6162151215591, 220.6162151215591, 
220.6162151215591, 220.6162151215591, 220.6162151215591, 220.6162151215591], 
"eval_len": [117, 117, 117, 117, 117, 117, 117, 117, 117, 117]}

  8%|▊         | 79999/1000000 [1:24:10<12:40:44, 20.16it/s]global step 80000, trans_decision ep_re 1979.17734051651

{"global_step": 80000, "eval_re": [1979.1773405165097, 1979.1773405165097, 
1979.1773405165097, 1979.1773405165097, 1979.1773405165097, 1979.1773405165097, 
1979.1773405165097, 1979.1773405165097, 1979.1773405165097, 1979.1773405165097],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:35:30<12:20:17, 20.49it/s]global step 90000, trans_decision ep_re 1673.981952148904

{"global_step": 90000, "eval_re": [1673.9819521489042, 1673.9819521489042, 
1673.9819521489042, 1673.9819521489042, 1673.9819521489042, 1673.9819521489042, 
1673.9819521489042, 1673.9819521489042, 1673.9819521489042, 1673.9819521489042],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99998/1000000 [1:47:00<12:00:35, 20.82it/s]global step 100000, trans_decision ep_re 1660.9904285871496

{"global_step": 100000, "eval_re": [1660.9904285871498, 1660.9904285871498, 
1660.9904285871498, 1660.9904285871498, 1660.9904285871498, 1660.9904285871498, 
1660.9904285871498, 1660.9904285871498, 1660.9904285871498, 1660.9904285871498],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109999/1000000 [1:58:20<11:55:33, 20.73it/s]global step 110000, trans_decision ep_re 1503.677158588182

{"global_step": 110000, "eval_re": [1503.6771585881822, 1503.6771585881822, 
1503.6771585881822, 1503.6771585881822, 1503.6771585881822, 1503.6771585881822, 
1503.6771585881822, 1503.6771585881822, 1503.6771585881822, 1503.6771585881822],
"eval_len": [580, 580, 580, 580, 580, 580, 580, 580, 580, 580]}

 12%|█▏        | 119999/1000000 [2:09:30<11:48:47, 20.69it/s]global step 120000, trans_decision ep_re 1150.3578368844005

{"global_step": 120000, "eval_re": [1150.3578368844005, 1150.3578368844005, 
1150.3578368844005, 1150.3578368844005, 1150.3578368844005, 1150.3578368844005, 
1150.3578368844005, 1150.3578368844005, 1150.3578368844005, 1150.3578368844005],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [2:21:00<11:47:50, 20.49it/s]global step 130000, trans_decision ep_re 2490.2029745027094

{"global_step": 130000, "eval_re": [2490.2029745027094, 2490.2029745027094, 
2490.2029745027094, 2490.2029745027094, 2490.2029745027094, 2490.2029745027094, 
2490.2029745027094, 2490.2029745027094, 2490.2029745027094, 2490.2029745027094],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139998/1000000 [2:32:20<11:20:31, 21.06it/s]global step 140000, trans_decision ep_re 2661.750248150293

{"global_step": 140000, "eval_re": [2661.750248150293, 2661.750248150293, 
2661.750248150293, 2661.750248150293, 2661.750248150293, 2661.750248150293, 
2661.750248150293, 2661.750248150293, 2661.750248150293, 2661.750248150293], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:43:50<11:27:56, 20.59it/s]global step 150000, trans_decision ep_re 2523.178963215345

{"global_step": 150000, "eval_re": [2523.178963215345, 2523.178963215345, 
2523.178963215345, 2523.178963215345, 2523.178963215345, 2523.178963215345, 
2523.178963215345, 2523.178963215345, 2523.178963215345, 2523.178963215345], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [2:55:10<11:10:41, 20.87it/s]global step 160000, trans_decision ep_re 2591.447087642792

{"global_step": 160000, "eval_re": [2591.4470876427927, 2591.4470876427927, 
2591.4470876427927, 2591.4470876427927, 2591.4470876427927, 2591.4470876427927, 
2591.4470876427927, 2591.4470876427927, 2591.4470876427927, 2591.4470876427927],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169999/1000000 [3:06:40<11:06:08, 20.77it/s]global step 170000, trans_decision ep_re 2045.8197403799197

{"global_step": 170000, "eval_re": [2045.8197403799197, 2045.8197403799197, 
2045.8197403799197, 2045.8197403799197, 2045.8197403799197, 2045.8197403799197, 
2045.8197403799197, 2045.8197403799197, 2045.8197403799197, 2045.8197403799197],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179998/1000000 [3:18:10<11:01:08, 20.67it/s]global step 180000, trans_decision ep_re 1226.0631602524015

{"global_step": 180000, "eval_re": [1226.0631602524018, 1226.0631602524018, 
1226.0631602524018, 1226.0631602524018, 1226.0631602524018, 1226.0631602524018, 
1226.0631602524018, 1226.0631602524018, 1226.0631602524018, 1226.0631602524018],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [3:29:40<11:07:10, 20.23it/s]global step 190000, trans_decision ep_re 2999.667512552002

{"global_step": 190000, "eval_re": [2999.667512552002, 2999.667512552002, 
2999.667512552002, 2999.667512552002, 2999.667512552002, 2999.667512552002, 
2999.667512552002, 2999.667512552002, 2999.667512552002, 2999.667512552002], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [3:41:10<10:49:11, 20.54it/s]global step 200000, trans_decision ep_re 2139.157547835518

{"global_step": 200000, "eval_re": [2139.157547835518, 2139.157547835518, 
2139.157547835518, 2139.157547835518, 2139.157547835518, 2139.157547835518, 
2139.157547835518, 2139.157547835518, 2139.157547835518, 2139.157547835518], 
"eval_len": [781, 781, 781, 781, 781, 781, 781, 781, 781, 781]}

 21%|██        | 209999/1000000 [3:52:30<10:41:29, 20.53it/s]global step 210000, trans_decision ep_re 2634.013840504875

{"global_step": 210000, "eval_re": [2634.013840504875, 2634.013840504875, 
2634.013840504875, 2634.013840504875, 2634.013840504875, 2634.013840504875, 
2634.013840504875, 2634.013840504875, 2634.013840504875, 2634.013840504875], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219997/1000000 [4:03:50<10:34:11, 20.50it/s]global step 220000, trans_decision ep_re 2401.0295689448258

{"global_step": 220000, "eval_re": [2401.029568944826, 2401.029568944826, 
2401.029568944826, 2401.029568944826, 2401.029568944826, 2401.029568944826, 
2401.029568944826, 2401.029568944826, 2401.029568944826, 2401.029568944826], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [4:15:10<10:27:19, 20.46it/s]global step 230000, trans_decision ep_re 388.7877720502042

{"global_step": 230000, "eval_re": [388.7877720502042, 388.7877720502042, 
388.7877720502042, 388.7877720502042, 388.7877720502042, 388.7877720502042, 
388.7877720502042, 388.7877720502042, 388.7877720502042, 388.7877720502042], 
"eval_len": [168, 168, 168, 168, 168, 168, 168, 168, 168, 168]}

 24%|██▍       | 239999/1000000 [4:26:10<10:08:04, 20.83it/s]global step 240000, trans_decision ep_re 3009.6862027322672

{"global_step": 240000, "eval_re": [3009.6862027322672, 3009.6862027322672, 
3009.6862027322672, 3009.6862027322672, 3009.6862027322672, 3009.6862027322672, 
3009.6862027322672, 3009.6862027322672, 3009.6862027322672, 3009.6862027322672],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [4:37:20<10:04:41, 20.67it/s]global step 250000, trans_decision ep_re 2766.7627866362004

{"global_step": 250000, "eval_re": [2766.7627866362, 2766.7627866362, 
2766.7627866362, 2766.7627866362, 2766.7627866362, 2766.7627866362, 
2766.7627866362, 2766.7627866362, 2766.7627866362, 2766.7627866362], "eval_len":
[1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [4:48:50<9:45:00, 21.08it/s]global step 260000, trans_decision ep_re 2674.1764147369

{"global_step": 260000, "eval_re": [2674.1764147369, 2674.1764147369, 
2674.1764147369, 2674.1764147369, 2674.1764147369, 2674.1764147369, 
2674.1764147369, 2674.1764147369, 2674.1764147369, 2674.1764147369], "eval_len":
[1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [5:00:20<9:43:48, 20.84it/s]global step 270000, trans_decision ep_re 3214.418200537544

{"global_step": 270000, "eval_re": [3214.418200537544, 3214.418200537544, 
3214.418200537544, 3214.418200537544, 3214.418200537544, 3214.418200537544, 
3214.418200537544, 3214.418200537544, 3214.418200537544, 3214.418200537544], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279998/1000000 [5:11:40<9:34:10, 20.90it/s]global step 280000, trans_decision ep_re 2930.676779299837

{"global_step": 280000, "eval_re": [2930.676779299837, 2930.676779299837, 
2930.676779299837, 2930.676779299837, 2930.676779299837, 2930.676779299837, 
2930.676779299837, 2930.676779299837, 2930.676779299837, 2930.676779299837], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289998/1000000 [5:23:00<9:19:14, 21.16it/s]global step 290000, trans_decision ep_re 1811.4434748205208

{"global_step": 290000, "eval_re": [1811.4434748205208, 1811.4434748205208, 
1811.4434748205208, 1811.4434748205208, 1811.4434748205208, 1811.4434748205208, 
1811.4434748205208, 1811.4434748205208, 1811.4434748205208, 1811.4434748205208],
"eval_len": [765, 765, 765, 765, 765, 765, 765, 765, 765, 765]}

 30%|██▉       | 299999/1000000 [5:34:20<9:30:37, 20.45it/s]global step 300000, trans_decision ep_re 2843.0174105970373

{"global_step": 300000, "eval_re": [2843.0174105970373, 2843.0174105970373, 
2843.0174105970373, 2843.0174105970373, 2843.0174105970373, 2843.0174105970373, 
2843.0174105970373, 2843.0174105970373, 2843.0174105970373, 2843.0174105970373],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [5:45:40<9:15:11, 20.71it/s]global step 310000, trans_decision ep_re 895.2951887445342

{"global_step": 310000, "eval_re": [895.2951887445342, 895.2951887445342, 
895.2951887445342, 895.2951887445342, 895.2951887445342, 895.2951887445342, 
895.2951887445342, 895.2951887445342, 895.2951887445342, 895.2951887445342], 
"eval_len": [281, 281, 281, 281, 281, 281, 281, 281, 281, 281]}

 32%|███▏      | 319999/1000000 [5:56:40<9:07:59, 20.68it/s]global step 320000, trans_decision ep_re 169.26233097354742

{"global_step": 320000, "eval_re": [169.26233097354742, 169.26233097354742, 
169.26233097354742, 169.26233097354742, 169.26233097354742, 169.26233097354742, 
169.26233097354742, 169.26233097354742, 169.26233097354742, 169.26233097354742],
"eval_len": [97, 97, 97, 97, 97, 97, 97, 97, 97, 97]}

 33%|███▎      | 329999/1000000 [6:07:30<8:57:30, 20.78it/s]global step 330000, trans_decision ep_re 3052.74433039705

{"global_step": 330000, "eval_re": [3052.74433039705, 3052.74433039705, 
3052.74433039705, 3052.74433039705, 3052.74433039705, 3052.74433039705, 
3052.74433039705, 3052.74433039705, 3052.74433039705, 3052.74433039705], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339998/1000000 [6:18:50<8:47:08, 20.87it/s]global step 340000, trans_decision ep_re 835.7674809027889

{"global_step": 340000, "eval_re": [835.767480902789, 835.767480902789, 
835.767480902789, 835.767480902789, 835.767480902789, 835.767480902789, 
835.767480902789, 835.767480902789, 835.767480902789, 835.767480902789], 
"eval_len": [309, 309, 309, 309, 309, 309, 309, 309, 309, 309]}

 35%|███▍      | 349999/1000000 [6:29:50<8:45:57, 20.60it/s]global step 350000, trans_decision ep_re 2922.670106385255

{"global_step": 350000, "eval_re": [2922.6701063852547, 2922.6701063852547, 
2922.6701063852547, 2922.6701063852547, 2922.6701063852547, 2922.6701063852547, 
2922.6701063852547, 2922.6701063852547, 2922.6701063852547, 2922.6701063852547],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359998/1000000 [6:41:20<8:38:11, 20.58it/s]global step 360000, trans_decision ep_re 2912.3248082084183

{"global_step": 360000, "eval_re": [2912.3248082084187, 2912.3248082084187, 
2912.3248082084187, 2912.3248082084187, 2912.3248082084187, 2912.3248082084187, 
2912.3248082084187, 2912.3248082084187, 2912.3248082084187, 2912.3248082084187],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [6:52:50<8:24:44, 20.80it/s]global step 370000, trans_decision ep_re 2740.919304127984

{"global_step": 370000, "eval_re": [2740.919304127984, 2740.919304127984, 
2740.919304127984, 2740.919304127984, 2740.919304127984, 2740.919304127984, 
2740.919304127984, 2740.919304127984, 2740.919304127984, 2740.919304127984], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [7:04:10<8:19:17, 20.70it/s]global step 380000, trans_decision ep_re 2831.189047819337

{"global_step": 380000, "eval_re": [2831.1890478193363, 2831.1890478193363, 
2831.1890478193363, 2831.1890478193363, 2831.1890478193363, 2831.1890478193363, 
2831.1890478193363, 2831.1890478193363, 2831.1890478193363, 2831.1890478193363],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389999/1000000 [7:15:30<8:14:12, 20.57it/s]global step 390000, trans_decision ep_re 2977.087396690029

{"global_step": 390000, "eval_re": [2977.087396690029, 2977.087396690029, 
2977.087396690029, 2977.087396690029, 2977.087396690029, 2977.087396690029, 
2977.087396690029, 2977.087396690029, 2977.087396690029, 2977.087396690029], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [7:27:00<8:15:40, 20.17it/s]global step 400000, trans_decision ep_re 55.128676684973904

{"global_step": 400000, "eval_re": [55.1286766849739, 55.1286766849739, 
55.1286766849739, 55.1286766849739, 55.1286766849739, 55.1286766849739, 
55.1286766849739, 55.1286766849739, 55.1286766849739, 55.1286766849739], 
"eval_len": [87, 87, 87, 87, 87, 87, 87, 87, 87, 87]}

 41%|████      | 409999/1000000 [7:37:50<7:56:49, 20.62it/s]global step 410000, trans_decision ep_re 3388.8065946816687

{"global_step": 410000, "eval_re": [3388.8065946816682, 3388.8065946816682, 
3388.8065946816682, 3388.8065946816682, 3388.8065946816682, 3388.8065946816682, 
3388.8065946816682, 3388.8065946816682, 3388.8065946816682, 3388.8065946816682],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [7:49:10<7:36:20, 21.18it/s]global step 420000, trans_decision ep_re 3574.8694760090184

{"global_step": 420000, "eval_re": [3574.8694760090184, 3574.8694760090184, 
3574.8694760090184, 3574.8694760090184, 3574.8694760090184, 3574.8694760090184, 
3574.8694760090184, 3574.8694760090184, 3574.8694760090184, 3574.8694760090184],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [8:00:30<7:35:10, 20.87it/s]global step 430000, trans_decision ep_re 2978.7697142075094

{"global_step": 430000, "eval_re": [2978.76971420751, 2978.76971420751, 
2978.76971420751, 2978.76971420751, 2978.76971420751, 2978.76971420751, 
2978.76971420751, 2978.76971420751, 2978.76971420751, 2978.76971420751], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [8:12:00<7:41:37, 20.22it/s]global step 440000, trans_decision ep_re 3175.2293176338244

{"global_step": 440000, "eval_re": [3175.2293176338244, 3175.2293176338244, 
3175.2293176338244, 3175.2293176338244, 3175.2293176338244, 3175.2293176338244, 
3175.2293176338244, 3175.2293176338244, 3175.2293176338244, 3175.2293176338244],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [8:23:30<7:27:50, 20.47it/s]global step 450000, trans_decision ep_re 3015.791906229567

{"global_step": 450000, "eval_re": [3015.7919062295673, 3015.7919062295673, 
3015.7919062295673, 3015.7919062295673, 3015.7919062295673, 3015.7919062295673, 
3015.7919062295673, 3015.7919062295673, 3015.7919062295673, 3015.7919062295673],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [8:35:00<7:10:18, 20.92it/s]global step 460000, trans_decision ep_re 2585.1063938281704

{"global_step": 460000, "eval_re": [2585.1063938281704, 2585.1063938281704, 
2585.1063938281704, 2585.1063938281704, 2585.1063938281704, 2585.1063938281704, 
2585.1063938281704, 2585.1063938281704, 2585.1063938281704, 2585.1063938281704],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [8:46:20<6:59:21, 21.06it/s]global step 470000, trans_decision ep_re 2799.7245377644317

{"global_step": 470000, "eval_re": [2799.7245377644317, 2799.7245377644317, 
2799.7245377644317, 2799.7245377644317, 2799.7245377644317, 2799.7245377644317, 
2799.7245377644317, 2799.7245377644317, 2799.7245377644317, 2799.7245377644317],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [8:57:40<6:55:52, 20.84it/s]global step 480000, trans_decision ep_re 1841.9061601501103

{"global_step": 480000, "eval_re": [1841.9061601501105, 1841.9061601501105, 
1841.9061601501105, 1841.9061601501105, 1841.9061601501105, 1841.9061601501105, 
1841.9061601501105, 1841.9061601501105, 1841.9061601501105, 1841.9061601501105],
"eval_len": [628, 628, 628, 628, 628, 628, 628, 628, 628, 628]}

 49%|████▉     | 489998/1000000 [9:08:50<6:47:38, 20.85it/s]global step 490000, trans_decision ep_re 529.8074443917202

{"global_step": 490000, "eval_re": [529.8074443917202, 529.8074443917202, 
529.8074443917202, 529.8074443917202, 529.8074443917202, 529.8074443917202, 
529.8074443917202, 529.8074443917202, 529.8074443917202, 529.8074443917202], 
"eval_len": [312, 312, 312, 312, 312, 312, 312, 312, 312, 312]}

 50%|████▉     | 499997/1000000 [9:19:50<6:42:42, 20.69it/s]global step 500000, trans_decision ep_re 2716.4487508388715

{"global_step": 500000, "eval_re": [2716.448750838872, 2716.448750838872, 
2716.448750838872, 2716.448750838872, 2716.448750838872, 2716.448750838872, 
2716.448750838872, 2716.448750838872, 2716.448750838872, 2716.448750838872], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [9:31:20<6:35:44, 20.64it/s]global step 510000, trans_decision ep_re 3043.209686860003

{"global_step": 510000, "eval_re": [3043.209686860003, 3043.209686860003, 
3043.209686860003, 3043.209686860003, 3043.209686860003, 3043.209686860003, 
3043.209686860003, 3043.209686860003, 3043.209686860003, 3043.209686860003], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519998/1000000 [9:42:50<6:23:16, 20.87it/s]global step 520000, trans_decision ep_re 523.7244620189642

{"global_step": 520000, "eval_re": [523.7244620189643, 523.7244620189643, 
523.7244620189643, 523.7244620189643, 523.7244620189643, 523.7244620189643, 
523.7244620189643, 523.7244620189643, 523.7244620189643, 523.7244620189643], 
"eval_len": [204, 204, 204, 204, 204, 204, 204, 204, 204, 204]}

 53%|█████▎    | 529999/1000000 [9:53:50<6:21:04, 20.56it/s]global step 530000, trans_decision ep_re 3117.7694154206506

{"global_step": 530000, "eval_re": [3117.7694154206506, 3117.7694154206506, 
3117.7694154206506, 3117.7694154206506, 3117.7694154206506, 3117.7694154206506, 
3117.7694154206506, 3117.7694154206506, 3117.7694154206506, 3117.7694154206506],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [10:05:20<6:06:28, 20.92it/s]global step 540000, trans_decision ep_re 1041.6819726707677

{"global_step": 540000, "eval_re": [1041.681972670768, 1041.681972670768, 
1041.681972670768, 1041.681972670768, 1041.681972670768, 1041.681972670768, 
1041.681972670768, 1041.681972670768, 1041.681972670768, 1041.681972670768], 
"eval_len": [395, 395, 395, 395, 395, 395, 395, 395, 395, 395]}

 55%|█████▍    | 549999/1000000 [10:16:20<6:01:44, 20.73it/s]global step 550000, trans_decision ep_re 2171.250912415671

{"global_step": 550000, "eval_re": [2171.250912415671, 2171.250912415671, 
2171.250912415671, 2171.250912415671, 2171.250912415671, 2171.250912415671, 
2171.250912415671, 2171.250912415671, 2171.250912415671, 2171.250912415671], 
"eval_len": [852, 852, 852, 852, 852, 852, 852, 852, 852, 852]}

 56%|█████▌    | 559997/1000000 [10:27:40<5:52:23, 20.81it/s]global step 560000, trans_decision ep_re 1270.0430291303323

{"global_step": 560000, "eval_re": [1270.0430291303321, 1270.0430291303321, 
1270.0430291303321, 1270.0430291303321, 1270.0430291303321, 1270.0430291303321, 
1270.0430291303321, 1270.0430291303321, 1270.0430291303321, 1270.0430291303321],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [10:39:20<5:51:31, 20.39it/s]global step 570000, trans_decision ep_re -459.6105370924303

{"global_step": 570000, "eval_re": [-459.61053709243026, -459.61053709243026, 
-459.61053709243026, -459.61053709243026, -459.61053709243026, 
-459.61053709243026, -459.61053709243026, -459.61053709243026, 
-459.61053709243026, -459.61053709243026], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579998/1000000 [10:50:50<5:42:15, 20.45it/s]global step 580000, trans_decision ep_re 2381.9265048909956

{"global_step": 580000, "eval_re": [2381.926504890996, 2381.926504890996, 
2381.926504890996, 2381.926504890996, 2381.926504890996, 2381.926504890996, 
2381.926504890996, 2381.926504890996, 2381.926504890996, 2381.926504890996], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [11:02:20<5:34:36, 20.42it/s]global step 590000, trans_decision ep_re 1898.9119277886348

{"global_step": 590000, "eval_re": [1898.9119277886348, 1898.9119277886348, 
1898.9119277886348, 1898.9119277886348, 1898.9119277886348, 1898.9119277886348, 
1898.9119277886348, 1898.9119277886348, 1898.9119277886348, 1898.9119277886348],
"eval_len": [699, 699, 699, 699, 699, 699, 699, 699, 699, 699]}

 60%|█████▉    | 599997/1000000 [11:13:40<5:20:54, 20.77it/s]global step 600000, trans_decision ep_re 1962.0419759403944

{"global_step": 600000, "eval_re": [1962.041975940394, 1962.041975940394, 
1962.041975940394, 1962.041975940394, 1962.041975940394, 1962.041975940394, 
1962.041975940394, 1962.041975940394, 1962.041975940394, 1962.041975940394], 
"eval_len": [621, 621, 621, 621, 621, 621, 621, 621, 621, 621]}

 61%|██████    | 609999/1000000 [11:24:50<5:14:04, 20.70it/s]global step 610000, trans_decision ep_re 270.86257136706485

{"global_step": 610000, "eval_re": [270.86257136706485, 270.86257136706485, 
270.86257136706485, 270.86257136706485, 270.86257136706485, 270.86257136706485, 
270.86257136706485, 270.86257136706485, 270.86257136706485, 270.86257136706485],
"eval_len": [266, 266, 266, 266, 266, 266, 266, 266, 266, 266]}

 62%|██████▏   | 619998/1000000 [11:35:50<5:04:48, 20.78it/s]global step 620000, trans_decision ep_re 421.11097784176854

{"global_step": 620000, "eval_re": [421.11097784176854, 421.11097784176854, 
421.11097784176854, 421.11097784176854, 421.11097784176854, 421.11097784176854, 
421.11097784176854, 421.11097784176854, 421.11097784176854, 421.11097784176854],
"eval_len": [179, 179, 179, 179, 179, 179, 179, 179, 179, 179]}

 63%|██████▎   | 629997/1000000 [11:46:50<4:58:44, 20.64it/s]global step 630000, trans_decision ep_re -2078.8495868098257

{"global_step": 630000, "eval_re": [-2078.8495868098257, -2078.8495868098257, 
-2078.8495868098257, -2078.8495868098257, -2078.8495868098257, 
-2078.8495868098257, -2078.8495868098257, -2078.8495868098257, 
-2078.8495868098257, -2078.8495868098257], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639998/1000000 [11:58:20<4:44:11, 21.11it/s]global step 640000, trans_decision ep_re 562.5117180415465

{"global_step": 640000, "eval_re": [562.5117180415465, 562.5117180415465, 
562.5117180415465, 562.5117180415465, 562.5117180415465, 562.5117180415465, 
562.5117180415465, 562.5117180415465, 562.5117180415465, 562.5117180415465], 
"eval_len": [305, 305, 305, 305, 305, 305, 305, 305, 305, 305]}

 65%|██████▍   | 649999/1000000 [12:09:20<4:40:52, 20.77it/s]global step 650000, trans_decision ep_re 737.3019054550621

{"global_step": 650000, "eval_re": [737.301905455062, 737.301905455062, 
737.301905455062, 737.301905455062, 737.301905455062, 737.301905455062, 
737.301905455062, 737.301905455062, 737.301905455062, 737.301905455062], 
"eval_len": [283, 283, 283, 283, 283, 283, 283, 283, 283, 283]}

 66%|██████▌   | 659999/1000000 [12:20:20<4:34:59, 20.61it/s]global step 660000, trans_decision ep_re 3302.302321324469

{"global_step": 660000, "eval_re": [3302.302321324469, 3302.302321324469, 
3302.302321324469, 3302.302321324469, 3302.302321324469, 3302.302321324469, 
3302.302321324469, 3302.302321324469, 3302.302321324469, 3302.302321324469], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669997/1000000 [12:32:00<4:28:46, 20.46it/s]global step 670000, trans_decision ep_re 1650.012086292904

{"global_step": 670000, "eval_re": [1650.012086292904, 1650.012086292904, 
1650.012086292904, 1650.012086292904, 1650.012086292904, 1650.012086292904, 
1650.012086292904, 1650.012086292904, 1650.012086292904, 1650.012086292904], 
"eval_len": [762, 762, 762, 762, 762, 762, 762, 762, 762, 762]}

 68%|██████▊   | 679999/1000000 [12:43:20<4:20:07, 20.50it/s]global step 680000, trans_decision ep_re 2063.574945293949

{"global_step": 680000, "eval_re": [2063.574945293949, 2063.574945293949, 
2063.574945293949, 2063.574945293949, 2063.574945293949, 2063.574945293949, 
2063.574945293949, 2063.574945293949, 2063.574945293949, 2063.574945293949], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [12:54:40<4:07:46, 20.85it/s]global step 690000, trans_decision ep_re 1498.0184467575004

{"global_step": 690000, "eval_re": [1498.0184467575004, 1498.0184467575004, 
1498.0184467575004, 1498.0184467575004, 1498.0184467575004, 1498.0184467575004, 
1498.0184467575004, 1498.0184467575004, 1498.0184467575004, 1498.0184467575004],
"eval_len": [498, 498, 498, 498, 498, 498, 498, 498, 498, 498]}

 70%|██████▉   | 699998/1000000 [13:05:50<3:58:57, 20.93it/s]global step 700000, trans_decision ep_re 2425.1544866880204

{"global_step": 700000, "eval_re": [2425.15448668802, 2425.15448668802, 
2425.15448668802, 2425.15448668802, 2425.15448668802, 2425.15448668802, 
2425.15448668802, 2425.15448668802, 2425.15448668802, 2425.15448668802], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [13:17:10<3:53:01, 20.74it/s]global step 710000, trans_decision ep_re 2967.5787767252295

{"global_step": 710000, "eval_re": [2967.5787767252295, 2967.5787767252295, 
2967.5787767252295, 2967.5787767252295, 2967.5787767252295, 2967.5787767252295, 
2967.5787767252295, 2967.5787767252295, 2967.5787767252295, 2967.5787767252295],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719998/1000000 [13:28:40<3:43:59, 20.83it/s]global step 720000, trans_decision ep_re 2761.3776324025484

{"global_step": 720000, "eval_re": [2761.377632402548, 2761.377632402548, 
2761.377632402548, 2761.377632402548, 2761.377632402548, 2761.377632402548, 
2761.377632402548, 2761.377632402548, 2761.377632402548, 2761.377632402548], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [13:40:00<3:37:48, 20.66it/s]global step 730000, trans_decision ep_re 3145.3260375708333

{"global_step": 730000, "eval_re": [3145.326037570833, 3145.326037570833, 
3145.326037570833, 3145.326037570833, 3145.326037570833, 3145.326037570833, 
3145.326037570833, 3145.326037570833, 3145.326037570833, 3145.326037570833], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [13:51:30<3:30:33, 20.58it/s]global step 740000, trans_decision ep_re 1353.614320100614

{"global_step": 740000, "eval_re": [1353.6143201006137, 1353.6143201006137, 
1353.6143201006137, 1353.6143201006137, 1353.6143201006137, 1353.6143201006137, 
1353.6143201006137, 1353.6143201006137, 1353.6143201006137, 1353.6143201006137],
"eval_len": [523, 523, 523, 523, 523, 523, 523, 523, 523, 523]}

 75%|███████▍  | 749999/1000000 [14:02:40<3:20:00, 20.83it/s]global step 750000, trans_decision ep_re 2968.140994891456

{"global_step": 750000, "eval_re": [2968.140994891456, 2968.140994891456, 
2968.140994891456, 2968.140994891456, 2968.140994891456, 2968.140994891456, 
2968.140994891456, 2968.140994891456, 2968.140994891456, 2968.140994891456], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [14:14:01<3:13:12, 20.70it/s]global step 760000, trans_decision ep_re 3437.6976834985953

{"global_step": 760000, "eval_re": [3437.6976834985953, 3437.6976834985953, 
3437.6976834985953, 3437.6976834985953, 3437.6976834985953, 3437.6976834985953, 
3437.6976834985953, 3437.6976834985953, 3437.6976834985953, 3437.6976834985953],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [14:25:21<3:05:06, 20.71it/s]global step 770000, trans_decision ep_re 286.4785279412713

{"global_step": 770000, "eval_re": [286.4785279412713, 286.4785279412713, 
286.4785279412713, 286.4785279412713, 286.4785279412713, 286.4785279412713, 
286.4785279412713, 286.4785279412713, 286.4785279412713, 286.4785279412713], 
"eval_len": [149, 149, 149, 149, 149, 149, 149, 149, 149, 149]}

 78%|███████▊  | 779998/1000000 [14:36:11<2:54:01, 21.07it/s]global step 780000, trans_decision ep_re 1693.8084555830487

{"global_step": 780000, "eval_re": [1693.8084555830487, 1693.8084555830487, 
1693.8084555830487, 1693.8084555830487, 1693.8084555830487, 1693.8084555830487, 
1693.8084555830487, 1693.8084555830487, 1693.8084555830487, 1693.8084555830487],
"eval_len": [531, 531, 531, 531, 531, 531, 531, 531, 531, 531]}

 79%|███████▉  | 789999/1000000 [14:47:31<2:46:34, 21.01it/s]global step 790000, trans_decision ep_re 1675.838524870875

{"global_step": 790000, "eval_re": [1675.8385248708748, 1675.8385248708748, 
1675.8385248708748, 1675.8385248708748, 1675.8385248708748, 1675.8385248708748, 
1675.8385248708748, 1675.8385248708748, 1675.8385248708748, 1675.8385248708748],
"eval_len": [585, 585, 585, 585, 585, 585, 585, 585, 585, 585]}

 80%|███████▉  | 799999/1000000 [14:58:31<2:38:19, 21.05it/s]global step 800000, trans_decision ep_re 2975.7626904109816

{"global_step": 800000, "eval_re": [2975.7626904109816, 2975.7626904109816, 
2975.7626904109816, 2975.7626904109816, 2975.7626904109816, 2975.7626904109816, 
2975.7626904109816, 2975.7626904109816, 2975.7626904109816, 2975.7626904109816],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [15:10:01<2:32:49, 20.72it/s]global step 810000, trans_decision ep_re 2625.764569054122

{"global_step": 810000, "eval_re": [2625.764569054122, 2625.764569054122, 
2625.764569054122, 2625.764569054122, 2625.764569054122, 2625.764569054122, 
2625.764569054122, 2625.764569054122, 2625.764569054122, 2625.764569054122], 
"eval_len": [841, 841, 841, 841, 841, 841, 841, 841, 841, 841]}

 82%|████████▏ | 819999/1000000 [15:21:11<2:28:45, 20.17it/s]global step 820000, trans_decision ep_re 2551.0914523913443

{"global_step": 820000, "eval_re": [2551.091452391344, 2551.091452391344, 
2551.091452391344, 2551.091452391344, 2551.091452391344, 2551.091452391344, 
2551.091452391344, 2551.091452391344, 2551.091452391344, 2551.091452391344], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [15:32:41<2:16:59, 20.68it/s]global step 830000, trans_decision ep_re 806.1666162613112

{"global_step": 830000, "eval_re": [806.1666162613113, 806.1666162613113, 
806.1666162613113, 806.1666162613113, 806.1666162613113, 806.1666162613113, 
806.1666162613113, 806.1666162613113, 806.1666162613113, 806.1666162613113], 
"eval_len": [310, 310, 310, 310, 310, 310, 310, 310, 310, 310]}

 84%|████████▍ | 839998/1000000 [15:43:41<2:06:29, 21.08it/s]global step 840000, trans_decision ep_re 86.66581638178926

{"global_step": 840000, "eval_re": [86.66581638178926, 86.66581638178926, 
86.66581638178926, 86.66581638178926, 86.66581638178926, 86.66581638178926, 
86.66581638178926, 86.66581638178926, 86.66581638178926, 86.66581638178926], 
"eval_len": [66, 66, 66, 66, 66, 66, 66, 66, 66, 66]}

 85%|████████▍ | 849999/1000000 [15:54:31<2:02:21, 20.43it/s]global step 850000, trans_decision ep_re 111.8487821809355

{"global_step": 850000, "eval_re": [111.8487821809355, 111.8487821809355, 
111.8487821809355, 111.8487821809355, 111.8487821809355, 111.8487821809355, 
111.8487821809355, 111.8487821809355, 111.8487821809355, 111.8487821809355], 
"eval_len": [70, 70, 70, 70, 70, 70, 70, 70, 70, 70]}

 86%|████████▌ | 859998/1000000 [16:05:21<1:51:34, 20.91it/s]global step 860000, trans_decision ep_re 3297.84118129495

{"global_step": 860000, "eval_re": [3297.8411812949494, 3297.8411812949494, 
3297.8411812949494, 3297.8411812949494, 3297.8411812949494, 3297.8411812949494, 
3297.8411812949494, 3297.8411812949494, 3297.8411812949494, 3297.8411812949494],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [16:16:51<1:46:51, 20.28it/s]global step 870000, trans_decision ep_re 3197.578126545221

{"global_step": 870000, "eval_re": [3197.578126545221, 3197.578126545221, 
3197.578126545221, 3197.578126545221, 3197.578126545221, 3197.578126545221, 
3197.578126545221, 3197.578126545221, 3197.578126545221, 3197.578126545221], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [16:28:11<1:37:28, 20.52it/s]global step 880000, trans_decision ep_re 1515.482827804613

{"global_step": 880000, "eval_re": [1515.482827804613, 1515.482827804613, 
1515.482827804613, 1515.482827804613, 1515.482827804613, 1515.482827804613, 
1515.482827804613, 1515.482827804613, 1515.482827804613, 1515.482827804613], 
"eval_len": [718, 718, 718, 718, 718, 718, 718, 718, 718, 718]}

 89%|████████▉ | 889999/1000000 [16:39:31<1:28:35, 20.70it/s]global step 890000, trans_decision ep_re 1180.2192423583542

{"global_step": 890000, "eval_re": [1180.2192423583542, 1180.2192423583542, 
1180.2192423583542, 1180.2192423583542, 1180.2192423583542, 1180.2192423583542, 
1180.2192423583542, 1180.2192423583542, 1180.2192423583542, 1180.2192423583542],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [16:51:01<1:20:42, 20.65it/s]global step 900000, trans_decision ep_re 3459.7850237826233

{"global_step": 900000, "eval_re": [3459.7850237826237, 3459.7850237826237, 
3459.7850237826237, 3459.7850237826237, 3459.7850237826237, 3459.7850237826237, 
3459.7850237826237, 3459.7850237826237, 3459.7850237826237, 3459.7850237826237],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [17:02:21<1:12:27, 20.70it/s]global step 910000, trans_decision ep_re 3445.2348624346414

{"global_step": 910000, "eval_re": [3445.2348624346414, 3445.2348624346414, 
3445.2348624346414, 3445.2348624346414, 3445.2348624346414, 3445.2348624346414, 
3445.2348624346414, 3445.2348624346414, 3445.2348624346414, 3445.2348624346414],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [17:13:51<1:04:12, 20.77it/s]global step 920000, trans_decision ep_re 3367.469132032068

{"global_step": 920000, "eval_re": [3367.469132032068, 3367.469132032068, 
3367.469132032068, 3367.469132032068, 3367.469132032068, 3367.469132032068, 
3367.469132032068, 3367.469132032068, 3367.469132032068, 3367.469132032068], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [17:25:11<56:28, 20.66it/s]global step 930000, trans_decision ep_re 3085.225538298513

{"global_step": 930000, "eval_re": [3085.225538298513, 3085.225538298513, 
3085.225538298513, 3085.225538298513, 3085.225538298513, 3085.225538298513, 
3085.225538298513, 3085.225538298513, 3085.225538298513, 3085.225538298513], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [17:36:41<48:14, 20.73it/s]global step 940000, trans_decision ep_re 3079.0909239736293

{"global_step": 940000, "eval_re": [3079.0909239736293, 3079.0909239736293, 
3079.0909239736293, 3079.0909239736293, 3079.0909239736293, 3079.0909239736293, 
3079.0909239736293, 3079.0909239736293, 3079.0909239736293, 3079.0909239736293],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [17:48:01<40:23, 20.63it/s]global step 950000, trans_decision ep_re 2230.1048673272458

{"global_step": 950000, "eval_re": [2230.1048673272458, 2230.1048673272458, 
2230.1048673272458, 2230.1048673272458, 2230.1048673272458, 2230.1048673272458, 
2230.1048673272458, 2230.1048673272458, 2230.1048673272458, 2230.1048673272458],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [17:59:31<32:27, 20.54it/s]global step 960000, trans_decision ep_re 1893.499305865807

{"global_step": 960000, "eval_re": [1893.499305865807, 1893.499305865807, 
1893.499305865807, 1893.499305865807, 1893.499305865807, 1893.499305865807, 
1893.499305865807, 1893.499305865807, 1893.499305865807, 1893.499305865807], 
"eval_len": [621, 621, 621, 621, 621, 621, 621, 621, 621, 621]}

 97%|█████████▋| 969999/1000000 [18:10:41<24:10, 20.68it/s]global step 970000, trans_decision ep_re 1357.4542553434499

{"global_step": 970000, "eval_re": [1357.4542553434499, 1357.4542553434499, 
1357.4542553434499, 1357.4542553434499, 1357.4542553434499, 1357.4542553434499, 
1357.4542553434499, 1357.4542553434499, 1357.4542553434499, 1357.4542553434499],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [18:22:01<16:06, 20.69it/s]global step 980000, trans_decision ep_re 151.91458939328885

{"global_step": 980000, "eval_re": [151.91458939328882, 151.91458939328882, 
151.91458939328882, 151.91458939328882, 151.91458939328882, 151.91458939328882, 
151.91458939328882, 151.91458939328882, 151.91458939328882, 151.91458939328882],
"eval_len": [77, 77, 77, 77, 77, 77, 77, 77, 77, 77]}

 99%|█████████▉| 989998/1000000 [18:33:01<08:01, 20.79it/s]global step 990000, trans_decision ep_re 3121.357334942738

{"global_step": 990000, "eval_re": [3121.3573349427375, 3121.3573349427375, 
3121.3573349427375, 3121.3573349427375, 3121.3573349427375, 3121.3573349427375, 
3121.3573349427375, 3121.3573349427375, 3121.3573349427375, 3121.3573349427375],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [18:44:31<00:00, 20.88it/s]global step 1000000, trans_decision ep_re 3338.4116948820265

{"global_step": 1000000, "eval_re": [3338.4116948820265, 3338.4116948820265, 
3338.4116948820265, 3338.4116948820265, 3338.4116948820265, 3338.4116948820265, 
3338.4116948820265, 3338.4116948820265, 3338.4116948820265, 3338.4116948820265],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [18:45:08<00:00, 14.81it/s]
