
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.0
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [05:30<12:52:25, 21.36it/s]global step 10000, trans_decision ep_re 177.84978682355106

{"global_step": 10000, "eval_re": [177.84978682355106, 177.84978682355106, 
177.84978682355106, 177.84978682355106, 177.84978682355106, 177.84978682355106, 
177.84978682355106, 177.84978682355106, 177.84978682355106, 177.84978682355106],
"eval_len": [113, 113, 113, 113, 113, 113, 113, 113, 113, 113]}

  2%|▏         | 19999/1000000 [15:50<12:56:37, 21.03it/s]global step 20000, trans_decision ep_re 103.86227932822207

{"global_step": 20000, "eval_re": [103.86227932822207, 103.86227932822207, 
103.86227932822207, 103.86227932822207, 103.86227932822207, 103.86227932822207, 
103.86227932822207, 103.86227932822207, 103.86227932822207, 103.86227932822207],
"eval_len": [81, 81, 81, 81, 81, 81, 81, 81, 81, 81]}

  3%|▎         | 29997/1000000 [26:40<12:40:28, 21.26it/s]global step 30000, trans_decision ep_re 119.58577480071354

{"global_step": 30000, "eval_re": [119.58577480071352, 119.58577480071352, 
119.58577480071352, 119.58577480071352, 119.58577480071352, 119.58577480071352, 
119.58577480071352, 119.58577480071352, 119.58577480071352, 119.58577480071352],
"eval_len": [83, 83, 83, 83, 83, 83, 83, 83, 83, 83]}

  4%|▍         | 39999/1000000 [37:00<12:36:11, 21.16it/s]global step 40000, trans_decision ep_re 121.44342306775727

{"global_step": 40000, "eval_re": [121.44342306775728, 121.44342306775728, 
121.44342306775728, 121.44342306775728, 121.44342306775728, 121.44342306775728, 
121.44342306775728, 121.44342306775728, 121.44342306775728, 121.44342306775728],
"eval_len": [85, 85, 85, 85, 85, 85, 85, 85, 85, 85]}

  5%|▍         | 49998/1000000 [47:50<12:12:49, 21.61it/s]global step 50000, trans_decision ep_re 149.75496342129972

{"global_step": 50000, "eval_re": [149.75496342129975, 149.75496342129975, 
149.75496342129975, 149.75496342129975, 149.75496342129975, 149.75496342129975, 
149.75496342129975, 149.75496342129975, 149.75496342129975, 149.75496342129975],
"eval_len": [96, 96, 96, 96, 96, 96, 96, 96, 96, 96]}

  6%|▌         | 59998/1000000 [58:30<12:22:16, 21.11it/s]global step 60000, trans_decision ep_re 59.24779390157797

{"global_step": 60000, "eval_re": [59.24779390157797, 59.24779390157797, 
59.24779390157797, 59.24779390157797, 59.24779390157797, 59.24779390157797, 
59.24779390157797, 59.24779390157797, 59.24779390157797, 59.24779390157797], 
"eval_len": [52, 52, 52, 52, 52, 52, 52, 52, 52, 52]}

  7%|▋         | 69999/1000000 [1:09:10<12:14:25, 21.10it/s]global step 70000, trans_decision ep_re 137.83921401885317

{"global_step": 70000, "eval_re": [137.83921401885317, 137.83921401885317, 
137.83921401885317, 137.83921401885317, 137.83921401885317, 137.83921401885317, 
137.83921401885317, 137.83921401885317, 137.83921401885317, 137.83921401885317],
"eval_len": [108, 108, 108, 108, 108, 108, 108, 108, 108, 108]}

  8%|▊         | 79999/1000000 [1:19:40<12:11:22, 20.97it/s]global step 80000, trans_decision ep_re 133.57046524116691

{"global_step": 80000, "eval_re": [133.57046524116691, 133.57046524116691, 
133.57046524116691, 133.57046524116691, 133.57046524116691, 133.57046524116691, 
133.57046524116691, 133.57046524116691, 133.57046524116691, 133.57046524116691],
"eval_len": [88, 88, 88, 88, 88, 88, 88, 88, 88, 88]}

  9%|▉         | 89999/1000000 [1:30:20<11:58:46, 21.10it/s]global step 90000, trans_decision ep_re 478.0926566487866

{"global_step": 90000, "eval_re": [478.09265664878654, 478.09265664878654, 
478.09265664878654, 478.09265664878654, 478.09265664878654, 478.09265664878654, 
478.09265664878654, 478.09265664878654, 478.09265664878654, 478.09265664878654],
"eval_len": [205, 205, 205, 205, 205, 205, 205, 205, 205, 205]}

 10%|▉         | 99999/1000000 [1:41:00<11:47:32, 21.20it/s]global step 100000, trans_decision ep_re 336.9450386672974

{"global_step": 100000, "eval_re": [336.9450386672974, 336.9450386672974, 
336.9450386672974, 336.9450386672974, 336.9450386672974, 336.9450386672974, 
336.9450386672974, 336.9450386672974, 336.9450386672974, 336.9450386672974], 
"eval_len": [149, 149, 149, 149, 149, 149, 149, 149, 149, 149]}

 11%|█         | 109999/1000000 [1:51:30<11:41:25, 21.15it/s]global step 110000, trans_decision ep_re 433.56218855334157

{"global_step": 110000, "eval_re": [433.56218855334157, 433.56218855334157, 
433.56218855334157, 433.56218855334157, 433.56218855334157, 433.56218855334157, 
433.56218855334157, 433.56218855334157, 433.56218855334157, 433.56218855334157],
"eval_len": [194, 194, 194, 194, 194, 194, 194, 194, 194, 194]}

 12%|█▏        | 119998/1000000 [2:02:10<11:24:38, 21.42it/s]global step 120000, trans_decision ep_re 834.8445730190317

{"global_step": 120000, "eval_re": [834.8445730190316, 834.8445730190316, 
834.8445730190316, 834.8445730190316, 834.8445730190316, 834.8445730190316, 
834.8445730190316, 834.8445730190316, 834.8445730190316, 834.8445730190316], 
"eval_len": [294, 294, 294, 294, 294, 294, 294, 294, 294, 294]}

 13%|█▎        | 129998/1000000 [2:12:50<11:11:33, 21.59it/s]global step 130000, trans_decision ep_re 1009.3429741910244

{"global_step": 130000, "eval_re": [1009.3429741910245, 1009.3429741910245, 
1009.3429741910245, 1009.3429741910245, 1009.3429741910245, 1009.3429741910245, 
1009.3429741910245, 1009.3429741910245, 1009.3429741910245, 1009.3429741910245],
"eval_len": [373, 373, 373, 373, 373, 373, 373, 373, 373, 373]}

 14%|█▍        | 139998/1000000 [2:23:30<10:56:20, 21.84it/s]global step 140000, trans_decision ep_re 100.04966181687283

{"global_step": 140000, "eval_re": [100.04966181687283, 100.04966181687283, 
100.04966181687283, 100.04966181687283, 100.04966181687283, 100.04966181687283, 
100.04966181687283, 100.04966181687283, 100.04966181687283, 100.04966181687283],
"eval_len": [74, 74, 74, 74, 74, 74, 74, 74, 74, 74]}

 15%|█▍        | 149999/1000000 [2:33:50<10:58:56, 21.50it/s]global step 150000, trans_decision ep_re 454.0597425893895

{"global_step": 150000, "eval_re": [454.0597425893895, 454.0597425893895, 
454.0597425893895, 454.0597425893895, 454.0597425893895, 454.0597425893895, 
454.0597425893895, 454.0597425893895, 454.0597425893895, 454.0597425893895], 
"eval_len": [198, 198, 198, 198, 198, 198, 198, 198, 198, 198]}

 16%|█▌        | 159999/1000000 [2:44:20<10:50:25, 21.52it/s]global step 160000, trans_decision ep_re 444.3036702306798

{"global_step": 160000, "eval_re": [444.30367023067987, 444.30367023067987, 
444.30367023067987, 444.30367023067987, 444.30367023067987, 444.30367023067987, 
444.30367023067987, 444.30367023067987, 444.30367023067987, 444.30367023067987],
"eval_len": [203, 203, 203, 203, 203, 203, 203, 203, 203, 203]}

 17%|█▋        | 169999/1000000 [2:54:50<10:44:13, 21.47it/s]global step 170000, trans_decision ep_re 442.3054870891177

{"global_step": 170000, "eval_re": [442.3054870891178, 442.3054870891178, 
442.3054870891178, 442.3054870891178, 442.3054870891178, 442.3054870891178, 
442.3054870891178, 442.3054870891178, 442.3054870891178, 442.3054870891178], 
"eval_len": [191, 191, 191, 191, 191, 191, 191, 191, 191, 191]}

 18%|█▊        | 179999/1000000 [3:05:20<10:35:16, 21.51it/s]global step 180000, trans_decision ep_re 801.7403791730343

{"global_step": 180000, "eval_re": [801.7403791730344, 801.7403791730344, 
801.7403791730344, 801.7403791730344, 801.7403791730344, 801.7403791730344, 
801.7403791730344, 801.7403791730344, 801.7403791730344, 801.7403791730344], 
"eval_len": [290, 290, 290, 290, 290, 290, 290, 290, 290, 290]}

 19%|█▉        | 189999/1000000 [3:15:41<10:26:37, 21.54it/s]global step 190000, trans_decision ep_re 80.92724365237811

{"global_step": 190000, "eval_re": [80.9272436523781, 80.9272436523781, 
80.9272436523781, 80.9272436523781, 80.9272436523781, 80.9272436523781, 
80.9272436523781, 80.9272436523781, 80.9272436523781, 80.9272436523781], 
"eval_len": [70, 70, 70, 70, 70, 70, 70, 70, 70, 70]}

 20%|█▉        | 199999/1000000 [3:26:20<10:21:07, 21.47it/s]global step 200000, trans_decision ep_re 1179.1553739699816

{"global_step": 200000, "eval_re": [1179.1553739699818, 1179.1553739699818, 
1179.1553739699818, 1179.1553739699818, 1179.1553739699818, 1179.1553739699818, 
1179.1553739699818, 1179.1553739699818, 1179.1553739699818, 1179.1553739699818],
"eval_len": [385, 385, 385, 385, 385, 385, 385, 385, 385, 385]}

 21%|██        | 209999/1000000 [3:37:00<10:13:23, 21.47it/s]global step 210000, trans_decision ep_re 1981.7823618113173

{"global_step": 210000, "eval_re": [1981.7823618113173, 1981.7823618113173, 
1981.7823618113173, 1981.7823618113173, 1981.7823618113173, 1981.7823618113173, 
1981.7823618113173, 1981.7823618113173, 1981.7823618113173, 1981.7823618113173],
"eval_len": [704, 704, 704, 704, 704, 704, 704, 704, 704, 704]}

 22%|██▏       | 219999/1000000 [3:47:40<10:00:36, 21.64it/s]global step 220000, trans_decision ep_re 365.04040557632516

{"global_step": 220000, "eval_re": [365.04040557632516, 365.04040557632516, 
365.04040557632516, 365.04040557632516, 365.04040557632516, 365.04040557632516, 
365.04040557632516, 365.04040557632516, 365.04040557632516, 365.04040557632516],
"eval_len": [163, 163, 163, 163, 163, 163, 163, 163, 163, 163]}

 23%|██▎       | 229999/1000000 [3:58:10<10:00:22, 21.38it/s]global step 230000, trans_decision ep_re 509.7408909941852

{"global_step": 230000, "eval_re": [509.7408909941851, 509.7408909941851, 
509.7408909941851, 509.7408909941851, 509.7408909941851, 509.7408909941851, 
509.7408909941851, 509.7408909941851, 509.7408909941851, 509.7408909941851], 
"eval_len": [206, 206, 206, 206, 206, 206, 206, 206, 206, 206]}

 24%|██▍       | 239999/1000000 [4:08:40<9:48:25, 21.53it/s]global step 240000, trans_decision ep_re 206.40559658293154

{"global_step": 240000, "eval_re": [206.40559658293154, 206.40559658293154, 
206.40559658293154, 206.40559658293154, 206.40559658293154, 206.40559658293154, 
206.40559658293154, 206.40559658293154, 206.40559658293154, 206.40559658293154],
"eval_len": [112, 112, 112, 112, 112, 112, 112, 112, 112, 112]}

 25%|██▍       | 249997/1000000 [4:19:10<9:42:53, 21.44it/s]global step 250000, trans_decision ep_re 561.171441316392

{"global_step": 250000, "eval_re": [561.1714413163921, 561.1714413163921, 
561.1714413163921, 561.1714413163921, 561.1714413163921, 561.1714413163921, 
561.1714413163921, 561.1714413163921, 561.1714413163921, 561.1714413163921], 
"eval_len": [216, 216, 216, 216, 216, 216, 216, 216, 216, 216]}

 26%|██▌       | 259998/1000000 [4:29:40<9:25:21, 21.82it/s]global step 260000, trans_decision ep_re 759.9047054589665

{"global_step": 260000, "eval_re": [759.9047054589665, 759.9047054589665, 
759.9047054589665, 759.9047054589665, 759.9047054589665, 759.9047054589665, 
759.9047054589665, 759.9047054589665, 759.9047054589665, 759.9047054589665], 
"eval_len": [288, 288, 288, 288, 288, 288, 288, 288, 288, 288]}

 27%|██▋       | 269999/1000000 [4:40:10<9:25:11, 21.53it/s]global step 270000, trans_decision ep_re 308.0512100354246

{"global_step": 270000, "eval_re": [308.0512100354247, 308.0512100354247, 
308.0512100354247, 308.0512100354247, 308.0512100354247, 308.0512100354247, 
308.0512100354247, 308.0512100354247, 308.0512100354247, 308.0512100354247], 
"eval_len": [143, 143, 143, 143, 143, 143, 143, 143, 143, 143]}

 28%|██▊       | 279999/1000000 [4:50:40<9:18:38, 21.48it/s]global step 280000, trans_decision ep_re 591.0386013081677

{"global_step": 280000, "eval_re": [591.0386013081677, 591.0386013081677, 
591.0386013081677, 591.0386013081677, 591.0386013081677, 591.0386013081677, 
591.0386013081677, 591.0386013081677, 591.0386013081677, 591.0386013081677], 
"eval_len": [227, 227, 227, 227, 227, 227, 227, 227, 227, 227]}

 29%|██▉       | 289999/1000000 [5:01:10<9:10:49, 21.48it/s]global step 290000, trans_decision ep_re 365.4415443952143

{"global_step": 290000, "eval_re": [365.4415443952143, 365.4415443952143, 
365.4415443952143, 365.4415443952143, 365.4415443952143, 365.4415443952143, 
365.4415443952143, 365.4415443952143, 365.4415443952143, 365.4415443952143], 
"eval_len": [158, 158, 158, 158, 158, 158, 158, 158, 158, 158]}

 30%|██▉       | 299997/1000000 [5:11:40<9:01:02, 21.56it/s]global step 300000, trans_decision ep_re 376.50908609388443

{"global_step": 300000, "eval_re": [376.50908609388443, 376.50908609388443, 
376.50908609388443, 376.50908609388443, 376.50908609388443, 376.50908609388443, 
376.50908609388443, 376.50908609388443, 376.50908609388443, 376.50908609388443],
"eval_len": [165, 165, 165, 165, 165, 165, 165, 165, 165, 165]}

 31%|███       | 309998/1000000 [5:22:10<8:48:47, 21.75it/s]global step 310000, trans_decision ep_re 207.29507060679856

{"global_step": 310000, "eval_re": [207.2950706067986, 207.2950706067986, 
207.2950706067986, 207.2950706067986, 207.2950706067986, 207.2950706067986, 
207.2950706067986, 207.2950706067986, 207.2950706067986, 207.2950706067986], 
"eval_len": [111, 111, 111, 111, 111, 111, 111, 111, 111, 111]}

 32%|███▏      | 319999/1000000 [5:32:30<8:43:46, 21.64it/s]global step 320000, trans_decision ep_re 196.87251111597388

{"global_step": 320000, "eval_re": [196.8725111159739, 196.8725111159739, 
196.8725111159739, 196.8725111159739, 196.8725111159739, 196.8725111159739, 
196.8725111159739, 196.8725111159739, 196.8725111159739, 196.8725111159739], 
"eval_len": [108, 108, 108, 108, 108, 108, 108, 108, 108, 108]}

 33%|███▎      | 329999/1000000 [5:43:00<8:38:37, 21.53it/s]global step 330000, trans_decision ep_re 2021.72842304063

{"global_step": 330000, "eval_re": [2021.72842304063, 2021.72842304063, 
2021.72842304063, 2021.72842304063, 2021.72842304063, 2021.72842304063, 
2021.72842304063, 2021.72842304063, 2021.72842304063, 2021.72842304063], 
"eval_len": [648, 648, 648, 648, 648, 648, 648, 648, 648, 648]}

 34%|███▍      | 339999/1000000 [5:53:50<8:28:54, 21.62it/s]global step 340000, trans_decision ep_re 907.4317852433269

{"global_step": 340000, "eval_re": [907.4317852433269, 907.4317852433269, 
907.4317852433269, 907.4317852433269, 907.4317852433269, 907.4317852433269, 
907.4317852433269, 907.4317852433269, 907.4317852433269, 907.4317852433269], 
"eval_len": [295, 295, 295, 295, 295, 295, 295, 295, 295, 295]}

 35%|███▍      | 349998/1000000 [6:04:20<8:15:16, 21.87it/s]global step 350000, trans_decision ep_re 1202.2527371626825

{"global_step": 350000, "eval_re": [1202.2527371626827, 1202.2527371626827, 
1202.2527371626827, 1202.2527371626827, 1202.2527371626827, 1202.2527371626827, 
1202.2527371626827, 1202.2527371626827, 1202.2527371626827, 1202.2527371626827],
"eval_len": [365, 365, 365, 365, 365, 365, 365, 365, 365, 365]}

 36%|███▌      | 359999/1000000 [6:14:50<8:14:07, 21.59it/s]global step 360000, trans_decision ep_re 469.9305404716805

{"global_step": 360000, "eval_re": [469.9305404716805, 469.9305404716805, 
469.9305404716805, 469.9305404716805, 469.9305404716805, 469.9305404716805, 
469.9305404716805, 469.9305404716805, 469.9305404716805, 469.9305404716805], 
"eval_len": [185, 185, 185, 185, 185, 185, 185, 185, 185, 185]}

 37%|███▋      | 369999/1000000 [6:25:20<8:06:40, 21.58it/s]global step 370000, trans_decision ep_re 1094.9840609171447

{"global_step": 370000, "eval_re": [1094.9840609171447, 1094.9840609171447, 
1094.9840609171447, 1094.9840609171447, 1094.9840609171447, 1094.9840609171447, 
1094.9840609171447, 1094.9840609171447, 1094.9840609171447, 1094.9840609171447],
"eval_len": [371, 371, 371, 371, 371, 371, 371, 371, 371, 371]}

 38%|███▊      | 379999/1000000 [6:36:00<7:59:48, 21.54it/s]global step 380000, trans_decision ep_re 162.34928438312272

{"global_step": 380000, "eval_re": [162.34928438312272, 162.34928438312272, 
162.34928438312272, 162.34928438312272, 162.34928438312272, 162.34928438312272, 
162.34928438312272, 162.34928438312272, 162.34928438312272, 162.34928438312272],
"eval_len": [97, 97, 97, 97, 97, 97, 97, 97, 97, 97]}

 39%|███▉      | 389999/1000000 [6:46:20<7:51:14, 21.57it/s]global step 390000, trans_decision ep_re 375.94681734205585

{"global_step": 390000, "eval_re": [375.9468173420558, 375.9468173420558, 
375.9468173420558, 375.9468173420558, 375.9468173420558, 375.9468173420558, 
375.9468173420558, 375.9468173420558, 375.9468173420558, 375.9468173420558], 
"eval_len": [159, 159, 159, 159, 159, 159, 159, 159, 159, 159]}

 40%|███▉      | 399997/1000000 [6:56:50<7:42:51, 21.61it/s]global step 400000, trans_decision ep_re 492.0561503405722

{"global_step": 400000, "eval_re": [492.0561503405722, 492.0561503405722, 
492.0561503405722, 492.0561503405722, 492.0561503405722, 492.0561503405722, 
492.0561503405722, 492.0561503405722, 492.0561503405722, 492.0561503405722], 
"eval_len": [192, 192, 192, 192, 192, 192, 192, 192, 192, 192]}

 41%|████      | 409997/1000000 [7:07:20<7:37:51, 21.48it/s]global step 410000, trans_decision ep_re 1450.5899431131688

{"global_step": 410000, "eval_re": [1450.5899431131686, 1450.5899431131686, 
1450.5899431131686, 1450.5899431131686, 1450.5899431131686, 1450.5899431131686, 
1450.5899431131686, 1450.5899431131686, 1450.5899431131686, 1450.5899431131686],
"eval_len": [421, 421, 421, 421, 421, 421, 421, 421, 421, 421]}

 42%|████▏     | 419998/1000000 [7:18:00<7:20:36, 21.94it/s]global step 420000, trans_decision ep_re 1058.6660710177182

{"global_step": 420000, "eval_re": [1058.6660710177182, 1058.6660710177182, 
1058.6660710177182, 1058.6660710177182, 1058.6660710177182, 1058.6660710177182, 
1058.6660710177182, 1058.6660710177182, 1058.6660710177182, 1058.6660710177182],
"eval_len": [320, 320, 320, 320, 320, 320, 320, 320, 320, 320]}

 43%|████▎     | 429997/1000000 [7:28:30<7:20:05, 21.59it/s]global step 430000, trans_decision ep_re 199.36758180476062

{"global_step": 430000, "eval_re": [199.3675818047606, 199.3675818047606, 
199.3675818047606, 199.3675818047606, 199.3675818047606, 199.3675818047606, 
199.3675818047606, 199.3675818047606, 199.3675818047606, 199.3675818047606], 
"eval_len": [108, 108, 108, 108, 108, 108, 108, 108, 108, 108]}

 44%|████▍     | 439998/1000000 [7:39:00<7:11:01, 21.65it/s]global step 440000, trans_decision ep_re 513.602867716588

{"global_step": 440000, "eval_re": [513.6028677165881, 513.6028677165881, 
513.6028677165881, 513.6028677165881, 513.6028677165881, 513.6028677165881, 
513.6028677165881, 513.6028677165881, 513.6028677165881, 513.6028677165881], 
"eval_len": [191, 191, 191, 191, 191, 191, 191, 191, 191, 191]}

 45%|████▍     | 449998/1000000 [7:49:30<7:04:07, 21.61it/s]global step 450000, trans_decision ep_re 843.6082470369607

{"global_step": 450000, "eval_re": [843.6082470369607, 843.6082470369607, 
843.6082470369607, 843.6082470369607, 843.6082470369607, 843.6082470369607, 
843.6082470369607, 843.6082470369607, 843.6082470369607, 843.6082470369607], 
"eval_len": [284, 284, 284, 284, 284, 284, 284, 284, 284, 284]}

 46%|████▌     | 459998/1000000 [8:00:00<6:53:32, 21.76it/s]global step 460000, trans_decision ep_re 167.46673207355622

{"global_step": 460000, "eval_re": [167.46673207355622, 167.46673207355622, 
167.46673207355622, 167.46673207355622, 167.46673207355622, 167.46673207355622, 
167.46673207355622, 167.46673207355622, 167.46673207355622, 167.46673207355622],
"eval_len": [104, 104, 104, 104, 104, 104, 104, 104, 104, 104]}

 47%|████▋     | 469999/1000000 [8:10:30<6:50:31, 21.52it/s]global step 470000, trans_decision ep_re 206.06465992734653

{"global_step": 470000, "eval_re": [206.06465992734653, 206.06465992734653, 
206.06465992734653, 206.06465992734653, 206.06465992734653, 206.06465992734653, 
206.06465992734653, 206.06465992734653, 206.06465992734653, 206.06465992734653],
"eval_len": [110, 110, 110, 110, 110, 110, 110, 110, 110, 110]}

 48%|████▊     | 479999/1000000 [8:20:50<6:41:25, 21.59it/s]global step 480000, trans_decision ep_re 3239.257308073852

{"global_step": 480000, "eval_re": [3239.257308073852, 3239.257308073852, 
3239.257308073852, 3239.257308073852, 3239.257308073852, 3239.257308073852, 
3239.257308073852, 3239.257308073852, 3239.257308073852, 3239.257308073852], 
"eval_len": [950, 950, 950, 950, 950, 950, 950, 950, 950, 950]}

 49%|████▉     | 489998/1000000 [8:31:50<6:32:51, 21.64it/s]global step 490000, trans_decision ep_re 280.1846394868527

{"global_step": 490000, "eval_re": [280.1846394868527, 280.1846394868527, 
280.1846394868527, 280.1846394868527, 280.1846394868527, 280.1846394868527, 
280.1846394868527, 280.1846394868527, 280.1846394868527, 280.1846394868527], 
"eval_len": [131, 131, 131, 131, 131, 131, 131, 131, 131, 131]}

 50%|████▉     | 499999/1000000 [8:42:10<6:24:17, 21.68it/s]global step 500000, trans_decision ep_re 1500.7113415209562

{"global_step": 500000, "eval_re": [1500.711341520956, 1500.711341520956, 
1500.711341520956, 1500.711341520956, 1500.711341520956, 1500.711341520956, 
1500.711341520956, 1500.711341520956, 1500.711341520956, 1500.711341520956], 
"eval_len": [437, 437, 437, 437, 437, 437, 437, 437, 437, 437]}

 51%|█████     | 509998/1000000 [8:52:50<6:15:45, 21.73it/s]global step 510000, trans_decision ep_re 1007.4453071854463

{"global_step": 510000, "eval_re": [1007.4453071854464, 1007.4453071854464, 
1007.4453071854464, 1007.4453071854464, 1007.4453071854464, 1007.4453071854464, 
1007.4453071854464, 1007.4453071854464, 1007.4453071854464, 1007.4453071854464],
"eval_len": [339, 339, 339, 339, 339, 339, 339, 339, 339, 339]}

 52%|█████▏    | 519999/1000000 [9:03:20<6:08:01, 21.74it/s]global step 520000, trans_decision ep_re 1979.074876736065

{"global_step": 520000, "eval_re": [1979.0748767360653, 1979.0748767360653, 
1979.0748767360653, 1979.0748767360653, 1979.0748767360653, 1979.0748767360653, 
1979.0748767360653, 1979.0748767360653, 1979.0748767360653, 1979.0748767360653],
"eval_len": [553, 553, 553, 553, 553, 553, 553, 553, 553, 553]}

 53%|█████▎    | 529998/1000000 [9:14:00<5:59:36, 21.78it/s]global step 530000, trans_decision ep_re 2786.6499740315094

{"global_step": 530000, "eval_re": [2786.64997403151, 2786.64997403151, 
2786.64997403151, 2786.64997403151, 2786.64997403151, 2786.64997403151, 
2786.64997403151, 2786.64997403151, 2786.64997403151, 2786.64997403151], 
"eval_len": [783, 783, 783, 783, 783, 783, 783, 783, 783, 783]}

 54%|█████▍    | 539999/1000000 [9:24:50<5:55:52, 21.54it/s]global step 540000, trans_decision ep_re 83.83203113599983

{"global_step": 540000, "eval_re": [83.83203113599983, 83.83203113599983, 
83.83203113599983, 83.83203113599983, 83.83203113599983, 83.83203113599983, 
83.83203113599983, 83.83203113599983, 83.83203113599983, 83.83203113599983], 
"eval_len": [80, 80, 80, 80, 80, 80, 80, 80, 80, 80]}

 55%|█████▍    | 549999/1000000 [9:35:10<5:45:08, 21.73it/s]global step 550000, trans_decision ep_re 3377.6628951939647

{"global_step": 550000, "eval_re": [3377.6628951939642, 3377.6628951939642, 
3377.6628951939642, 3377.6628951939642, 3377.6628951939642, 3377.6628951939642, 
3377.6628951939642, 3377.6628951939642, 3377.6628951939642, 3377.6628951939642],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [9:46:10<5:43:06, 21.37it/s]global step 560000, trans_decision ep_re 767.8318100579427

{"global_step": 560000, "eval_re": [767.8318100579427, 767.8318100579427, 
767.8318100579427, 767.8318100579427, 767.8318100579427, 767.8318100579427, 
767.8318100579427, 767.8318100579427, 767.8318100579427, 767.8318100579427], 
"eval_len": [263, 263, 263, 263, 263, 263, 263, 263, 263, 263]}

 57%|█████▋    | 569998/1000000 [9:56:40<5:31:36, 21.61it/s]global step 570000, trans_decision ep_re 47.957727916110926

{"global_step": 570000, "eval_re": [47.957727916110926, 47.957727916110926, 
47.957727916110926, 47.957727916110926, 47.957727916110926, 47.957727916110926, 
47.957727916110926, 47.957727916110926, 47.957727916110926, 47.957727916110926],
"eval_len": [49, 49, 49, 49, 49, 49, 49, 49, 49, 49]}

 58%|█████▊    | 579999/1000000 [10:07:00<5:26:49, 21.42it/s]global step 580000, trans_decision ep_re 245.99902146189524

{"global_step": 580000, "eval_re": [245.99902146189527, 245.99902146189527, 
245.99902146189527, 245.99902146189527, 245.99902146189527, 245.99902146189527, 
245.99902146189527, 245.99902146189527, 245.99902146189527, 245.99902146189527],
"eval_len": [120, 120, 120, 120, 120, 120, 120, 120, 120, 120]}

 59%|█████▉    | 589998/1000000 [10:17:30<5:14:28, 21.73it/s]global step 590000, trans_decision ep_re 322.7296053689047

{"global_step": 590000, "eval_re": [322.7296053689047, 322.7296053689047, 
322.7296053689047, 322.7296053689047, 322.7296053689047, 322.7296053689047, 
322.7296053689047, 322.7296053689047, 322.7296053689047, 322.7296053689047], 
"eval_len": [144, 144, 144, 144, 144, 144, 144, 144, 144, 144]}

 60%|█████▉    | 599999/1000000 [10:28:00<5:08:11, 21.63it/s]global step 600000, trans_decision ep_re 244.53027082066552

{"global_step": 600000, "eval_re": [244.5302708206655, 244.5302708206655, 
244.5302708206655, 244.5302708206655, 244.5302708206655, 244.5302708206655, 
244.5302708206655, 244.5302708206655, 244.5302708206655, 244.5302708206655], 
"eval_len": [122, 122, 122, 122, 122, 122, 122, 122, 122, 122]}

 61%|██████    | 609999/1000000 [10:38:20<5:00:40, 21.62it/s]global step 610000, trans_decision ep_re 139.13426595894836

{"global_step": 610000, "eval_re": [139.13426595894836, 139.13426595894836, 
139.13426595894836, 139.13426595894836, 139.13426595894836, 139.13426595894836, 
139.13426595894836, 139.13426595894836, 139.13426595894836, 139.13426595894836],
"eval_len": [93, 93, 93, 93, 93, 93, 93, 93, 93, 93]}

 62%|██████▏   | 619998/1000000 [10:48:50<4:51:49, 21.70it/s]global step 620000, trans_decision ep_re 170.70310229446574

{"global_step": 620000, "eval_re": [170.70310229446574, 170.70310229446574, 
170.70310229446574, 170.70310229446574, 170.70310229446574, 170.70310229446574, 
170.70310229446574, 170.70310229446574, 170.70310229446574, 170.70310229446574],
"eval_len": [101, 101, 101, 101, 101, 101, 101, 101, 101, 101]}

 63%|██████▎   | 629999/1000000 [10:59:20<4:46:21, 21.53it/s]global step 630000, trans_decision ep_re 320.67067615946

{"global_step": 630000, "eval_re": [320.67067615946, 320.67067615946, 
320.67067615946, 320.67067615946, 320.67067615946, 320.67067615946, 
320.67067615946, 320.67067615946, 320.67067615946, 320.67067615946], "eval_len":
[141, 141, 141, 141, 141, 141, 141, 141, 141, 141]}

 64%|██████▍   | 639999/1000000 [11:09:40<4:37:44, 21.60it/s]global step 640000, trans_decision ep_re 3403.4632566748596

{"global_step": 640000, "eval_re": [3403.4632566748596, 3403.4632566748596, 
3403.4632566748596, 3403.4632566748596, 3403.4632566748596, 3403.4632566748596, 
3403.4632566748596, 3403.4632566748596, 3403.4632566748596, 3403.4632566748596],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649998/1000000 [11:20:40<4:26:11, 21.91it/s]global step 650000, trans_decision ep_re 960.7299302310705

{"global_step": 650000, "eval_re": [960.7299302310704, 960.7299302310704, 
960.7299302310704, 960.7299302310704, 960.7299302310704, 960.7299302310704, 
960.7299302310704, 960.7299302310704, 960.7299302310704, 960.7299302310704], 
"eval_len": [317, 317, 317, 317, 317, 317, 317, 317, 317, 317]}

 66%|██████▌   | 659999/1000000 [11:31:10<4:22:46, 21.57it/s]global step 660000, trans_decision ep_re 686.275097410595

{"global_step": 660000, "eval_re": [686.275097410595, 686.275097410595, 
686.275097410595, 686.275097410595, 686.275097410595, 686.275097410595, 
686.275097410595, 686.275097410595, 686.275097410595, 686.275097410595], 
"eval_len": [240, 240, 240, 240, 240, 240, 240, 240, 240, 240]}

 67%|██████▋   | 669999/1000000 [11:41:40<4:16:37, 21.43it/s]global step 670000, trans_decision ep_re 355.6170945157592

{"global_step": 670000, "eval_re": [355.6170945157592, 355.6170945157592, 
355.6170945157592, 355.6170945157592, 355.6170945157592, 355.6170945157592, 
355.6170945157592, 355.6170945157592, 355.6170945157592, 355.6170945157592], 
"eval_len": [149, 149, 149, 149, 149, 149, 149, 149, 149, 149]}

 68%|██████▊   | 679997/1000000 [11:52:10<4:06:21, 21.65it/s]global step 680000, trans_decision ep_re 1821.9500570163195

{"global_step": 680000, "eval_re": [1821.9500570163198, 1821.9500570163198, 
1821.9500570163198, 1821.9500570163198, 1821.9500570163198, 1821.9500570163198, 
1821.9500570163198, 1821.9500570163198, 1821.9500570163198, 1821.9500570163198],
"eval_len": [542, 542, 542, 542, 542, 542, 542, 542, 542, 542]}

 69%|██████▉   | 689997/1000000 [12:02:50<4:00:18, 21.50it/s]global step 690000, trans_decision ep_re 164.3763238099725

{"global_step": 690000, "eval_re": [164.3763238099725, 164.3763238099725, 
164.3763238099725, 164.3763238099725, 164.3763238099725, 164.3763238099725, 
164.3763238099725, 164.3763238099725, 164.3763238099725, 164.3763238099725], 
"eval_len": [97, 97, 97, 97, 97, 97, 97, 97, 97, 97]}

 70%|██████▉   | 699999/1000000 [12:13:20<3:51:22, 21.61it/s]global step 700000, trans_decision ep_re 1576.6721164646121

{"global_step": 700000, "eval_re": [1576.6721164646124, 1576.6721164646124, 
1576.6721164646124, 1576.6721164646124, 1576.6721164646124, 1576.6721164646124, 
1576.6721164646124, 1576.6721164646124, 1576.6721164646124, 1576.6721164646124],
"eval_len": [444, 444, 444, 444, 444, 444, 444, 444, 444, 444]}

 71%|███████   | 709997/1000000 [12:23:50<3:42:06, 21.76it/s]global step 710000, trans_decision ep_re 560.5221564113516

{"global_step": 710000, "eval_re": [560.5221564113516, 560.5221564113516, 
560.5221564113516, 560.5221564113516, 560.5221564113516, 560.5221564113516, 
560.5221564113516, 560.5221564113516, 560.5221564113516, 560.5221564113516], 
"eval_len": [203, 203, 203, 203, 203, 203, 203, 203, 203, 203]}

 72%|███████▏  | 719999/1000000 [12:34:10<3:34:09, 21.79it/s]global step 720000, trans_decision ep_re 565.0032348731595

{"global_step": 720000, "eval_re": [565.0032348731596, 565.0032348731596, 
565.0032348731596, 565.0032348731596, 565.0032348731596, 565.0032348731596, 
565.0032348731596, 565.0032348731596, 565.0032348731596, 565.0032348731596], 
"eval_len": [204, 204, 204, 204, 204, 204, 204, 204, 204, 204]}

 73%|███████▎  | 729999/1000000 [12:44:40<3:26:00, 21.84it/s]global step 730000, trans_decision ep_re 149.88739834278363

{"global_step": 730000, "eval_re": [149.88739834278363, 149.88739834278363, 
149.88739834278363, 149.88739834278363, 149.88739834278363, 149.88739834278363, 
149.88739834278363, 149.88739834278363, 149.88739834278363, 149.88739834278363],
"eval_len": [92, 92, 92, 92, 92, 92, 92, 92, 92, 92]}

 74%|███████▍  | 739999/1000000 [12:55:00<3:19:18, 21.74it/s]global step 740000, trans_decision ep_re 366.68740056715876

{"global_step": 740000, "eval_re": [366.6874005671587, 366.6874005671587, 
366.6874005671587, 366.6874005671587, 366.6874005671587, 366.6874005671587, 
366.6874005671587, 366.6874005671587, 366.6874005671587, 366.6874005671587], 
"eval_len": [155, 155, 155, 155, 155, 155, 155, 155, 155, 155]}

 75%|███████▍  | 749998/1000000 [13:05:20<3:09:43, 21.96it/s]global step 750000, trans_decision ep_re 770.6176682097455

{"global_step": 750000, "eval_re": [770.6176682097454, 770.6176682097454, 
770.6176682097454, 770.6176682097454, 770.6176682097454, 770.6176682097454, 
770.6176682097454, 770.6176682097454, 770.6176682097454, 770.6176682097454], 
"eval_len": [267, 267, 267, 267, 267, 267, 267, 267, 267, 267]}

 76%|███████▌  | 759999/1000000 [13:15:40<3:04:13, 21.71it/s]global step 760000, trans_decision ep_re 687.284509529198

{"global_step": 760000, "eval_re": [687.284509529198, 687.284509529198, 
687.284509529198, 687.284509529198, 687.284509529198, 687.284509529198, 
687.284509529198, 687.284509529198, 687.284509529198, 687.284509529198], 
"eval_len": [237, 237, 237, 237, 237, 237, 237, 237, 237, 237]}

 77%|███████▋  | 769998/1000000 [13:26:10<2:54:43, 21.94it/s]global step 770000, trans_decision ep_re 324.48961215549656

{"global_step": 770000, "eval_re": [324.48961215549656, 324.48961215549656, 
324.48961215549656, 324.48961215549656, 324.48961215549656, 324.48961215549656, 
324.48961215549656, 324.48961215549656, 324.48961215549656, 324.48961215549656],
"eval_len": [148, 148, 148, 148, 148, 148, 148, 148, 148, 148]}

 78%|███████▊  | 779997/1000000 [13:36:30<2:49:30, 21.63it/s]global step 780000, trans_decision ep_re 456.8562403576423

{"global_step": 780000, "eval_re": [456.85624035764226, 456.85624035764226, 
456.85624035764226, 456.85624035764226, 456.85624035764226, 456.85624035764226, 
456.85624035764226, 456.85624035764226, 456.85624035764226, 456.85624035764226],
"eval_len": [179, 179, 179, 179, 179, 179, 179, 179, 179, 179]}

 79%|███████▉  | 789999/1000000 [13:46:50<2:40:25, 21.82it/s]global step 790000, trans_decision ep_re 1008.5383911950219

{"global_step": 790000, "eval_re": [1008.538391195022, 1008.538391195022, 
1008.538391195022, 1008.538391195022, 1008.538391195022, 1008.538391195022, 
1008.538391195022, 1008.538391195022, 1008.538391195022, 1008.538391195022], 
"eval_len": [305, 305, 305, 305, 305, 305, 305, 305, 305, 305]}

 80%|███████▉  | 799997/1000000 [13:57:20<2:33:11, 21.76it/s]global step 800000, trans_decision ep_re 296.06454054266413

{"global_step": 800000, "eval_re": [296.0645405426642, 296.0645405426642, 
296.0645405426642, 296.0645405426642, 296.0645405426642, 296.0645405426642, 
296.0645405426642, 296.0645405426642, 296.0645405426642, 296.0645405426642], 
"eval_len": [136, 136, 136, 136, 136, 136, 136, 136, 136, 136]}

 81%|████████  | 809999/1000000 [14:07:40<2:25:12, 21.81it/s]global step 810000, trans_decision ep_re 43.22202075384554

{"global_step": 810000, "eval_re": [43.22202075384555, 43.22202075384555, 
43.22202075384555, 43.22202075384555, 43.22202075384555, 43.22202075384555, 
43.22202075384555, 43.22202075384555, 43.22202075384555, 43.22202075384555], 
"eval_len": [45, 45, 45, 45, 45, 45, 45, 45, 45, 45]}

 82%|████████▏ | 819998/1000000 [14:18:00<2:16:43, 21.94it/s]global step 820000, trans_decision ep_re 1723.6309849486875

{"global_step": 820000, "eval_re": [1723.6309849486875, 1723.6309849486875, 
1723.6309849486875, 1723.6309849486875, 1723.6309849486875, 1723.6309849486875, 
1723.6309849486875, 1723.6309849486875, 1723.6309849486875, 1723.6309849486875],
"eval_len": [482, 482, 482, 482, 482, 482, 482, 482, 482, 482]}

 83%|████████▎ | 829999/1000000 [14:28:30<2:09:25, 21.89it/s]global step 830000, trans_decision ep_re 140.94847963656642

{"global_step": 830000, "eval_re": [140.94847963656642, 140.94847963656642, 
140.94847963656642, 140.94847963656642, 140.94847963656642, 140.94847963656642, 
140.94847963656642, 140.94847963656642, 140.94847963656642, 140.94847963656642],
"eval_len": [91, 91, 91, 91, 91, 91, 91, 91, 91, 91]}

 84%|████████▍ | 839999/1000000 [14:38:50<2:02:16, 21.81it/s]global step 840000, trans_decision ep_re 940.4327016495808

{"global_step": 840000, "eval_re": [940.4327016495808, 940.4327016495808, 
940.4327016495808, 940.4327016495808, 940.4327016495808, 940.4327016495808, 
940.4327016495808, 940.4327016495808, 940.4327016495808, 940.4327016495808], 
"eval_len": [304, 304, 304, 304, 304, 304, 304, 304, 304, 304]}

 85%|████████▍ | 849998/1000000 [14:49:20<1:53:51, 21.96it/s]global step 850000, trans_decision ep_re 381.08905712877805

{"global_step": 850000, "eval_re": [381.089057128778, 381.089057128778, 
381.089057128778, 381.089057128778, 381.089057128778, 381.089057128778, 
381.089057128778, 381.089057128778, 381.089057128778, 381.089057128778], 
"eval_len": [159, 159, 159, 159, 159, 159, 159, 159, 159, 159]}

 86%|████████▌ | 859997/1000000 [14:59:41<1:47:19, 21.74it/s]global step 860000, trans_decision ep_re 1017.8098444250339

{"global_step": 860000, "eval_re": [1017.809844425034, 1017.809844425034, 
1017.809844425034, 1017.809844425034, 1017.809844425034, 1017.809844425034, 
1017.809844425034, 1017.809844425034, 1017.809844425034, 1017.809844425034], 
"eval_len": [340, 340, 340, 340, 340, 340, 340, 340, 340, 340]}

 87%|████████▋ | 869997/1000000 [15:10:11<1:39:53, 21.69it/s]global step 870000, trans_decision ep_re 2452.6061644500037

{"global_step": 870000, "eval_re": [2452.6061644500037, 2452.6061644500037, 
2452.6061644500037, 2452.6061644500037, 2452.6061644500037, 2452.6061644500037, 
2452.6061644500037, 2452.6061644500037, 2452.6061644500037, 2452.6061644500037],
"eval_len": [761, 761, 761, 761, 761, 761, 761, 761, 761, 761]}

 88%|████████▊ | 879999/1000000 [15:20:51<1:31:45, 21.80it/s]global step 880000, trans_decision ep_re 220.27182903105313

{"global_step": 880000, "eval_re": [220.27182903105313, 220.27182903105313, 
220.27182903105313, 220.27182903105313, 220.27182903105313, 220.27182903105313, 
220.27182903105313, 220.27182903105313, 220.27182903105313, 220.27182903105313],
"eval_len": [113, 113, 113, 113, 113, 113, 113, 113, 113, 113]}

 89%|████████▉ | 889999/1000000 [15:31:21<1:24:12, 21.77it/s]global step 890000, trans_decision ep_re 746.3703153376317

{"global_step": 890000, "eval_re": [746.3703153376317, 746.3703153376317, 
746.3703153376317, 746.3703153376317, 746.3703153376317, 746.3703153376317, 
746.3703153376317, 746.3703153376317, 746.3703153376317, 746.3703153376317], 
"eval_len": [257, 257, 257, 257, 257, 257, 257, 257, 257, 257]}

 90%|████████▉ | 899999/1000000 [15:41:41<1:16:26, 21.80it/s]global step 900000, trans_decision ep_re 764.9748082936519

{"global_step": 900000, "eval_re": [764.9748082936518, 764.9748082936518, 
764.9748082936518, 764.9748082936518, 764.9748082936518, 764.9748082936518, 
764.9748082936518, 764.9748082936518, 764.9748082936518, 764.9748082936518], 
"eval_len": [253, 253, 253, 253, 253, 253, 253, 253, 253, 253]}

 91%|█████████ | 909997/1000000 [15:52:11<1:08:46, 21.81it/s]global step 910000, trans_decision ep_re 240.3160085783461

{"global_step": 910000, "eval_re": [240.3160085783461, 240.3160085783461, 
240.3160085783461, 240.3160085783461, 240.3160085783461, 240.3160085783461, 
240.3160085783461, 240.3160085783461, 240.3160085783461, 240.3160085783461], 
"eval_len": [120, 120, 120, 120, 120, 120, 120, 120, 120, 120]}

 92%|█████████▏| 919999/1000000 [16:02:31<1:01:08, 21.81it/s]global step 920000, trans_decision ep_re 1743.4687278928075

{"global_step": 920000, "eval_re": [1743.4687278928075, 1743.4687278928075, 
1743.4687278928075, 1743.4687278928075, 1743.4687278928075, 1743.4687278928075, 
1743.4687278928075, 1743.4687278928075, 1743.4687278928075, 1743.4687278928075],
"eval_len": [507, 507, 507, 507, 507, 507, 507, 507, 507, 507]}

 93%|█████████▎| 929999/1000000 [16:12:51<53:36, 21.76it/s]global step 930000, trans_decision ep_re 149.6937573069533

{"global_step": 930000, "eval_re": [149.6937573069533, 149.6937573069533, 
149.6937573069533, 149.6937573069533, 149.6937573069533, 149.6937573069533, 
149.6937573069533, 149.6937573069533, 149.6937573069533, 149.6937573069533], 
"eval_len": [92, 92, 92, 92, 92, 92, 92, 92, 92, 92]}

 94%|█████████▍| 939998/1000000 [16:23:31<45:24, 22.03it/s]global step 940000, trans_decision ep_re 269.2943716397582

{"global_step": 940000, "eval_re": [269.2943716397582, 269.2943716397582, 
269.2943716397582, 269.2943716397582, 269.2943716397582, 269.2943716397582, 
269.2943716397582, 269.2943716397582, 269.2943716397582, 269.2943716397582], 
"eval_len": [132, 132, 132, 132, 132, 132, 132, 132, 132, 132]}

 95%|█████████▍| 949997/1000000 [16:33:51<38:31, 21.63it/s]global step 950000, trans_decision ep_re 2327.8725551698617

{"global_step": 950000, "eval_re": [2327.8725551698612, 2327.8725551698612, 
2327.8725551698612, 2327.8725551698612, 2327.8725551698612, 2327.8725551698612, 
2327.8725551698612, 2327.8725551698612, 2327.8725551698612, 2327.8725551698612],
"eval_len": [664, 664, 664, 664, 664, 664, 664, 664, 664, 664]}

 96%|█████████▌| 959997/1000000 [16:44:31<30:43, 21.70it/s]global step 960000, trans_decision ep_re 307.41604539859196

{"global_step": 960000, "eval_re": [307.41604539859196, 307.41604539859196, 
307.41604539859196, 307.41604539859196, 307.41604539859196, 307.41604539859196, 
307.41604539859196, 307.41604539859196, 307.41604539859196, 307.41604539859196],
"eval_len": [138, 138, 138, 138, 138, 138, 138, 138, 138, 138]}

 97%|█████████▋| 969999/1000000 [16:54:51<22:59, 21.75it/s]global step 970000, trans_decision ep_re 1334.508821651793

{"global_step": 970000, "eval_re": [1334.5088216517931, 1334.5088216517931, 
1334.5088216517931, 1334.5088216517931, 1334.5088216517931, 1334.5088216517931, 
1334.5088216517931, 1334.5088216517931, 1334.5088216517931, 1334.5088216517931],
"eval_len": [399, 399, 399, 399, 399, 399, 399, 399, 399, 399]}

 98%|█████████▊| 979998/1000000 [17:05:31<15:19, 21.75it/s]global step 980000, trans_decision ep_re 222.93099251628183

{"global_step": 980000, "eval_re": [222.9309925162818, 222.9309925162818, 
222.9309925162818, 222.9309925162818, 222.9309925162818, 222.9309925162818, 
222.9309925162818, 222.9309925162818, 222.9309925162818, 222.9309925162818], 
"eval_len": [114, 114, 114, 114, 114, 114, 114, 114, 114, 114]}

 99%|█████████▉| 989999/1000000 [17:15:51<07:43, 21.59it/s]global step 990000, trans_decision ep_re 595.5092154536997

{"global_step": 990000, "eval_re": [595.5092154536997, 595.5092154536997, 
595.5092154536997, 595.5092154536997, 595.5092154536997, 595.5092154536997, 
595.5092154536997, 595.5092154536997, 595.5092154536997, 595.5092154536997], 
"eval_len": [211, 211, 211, 211, 211, 211, 211, 211, 211, 211]}

100%|█████████▉| 999999/1000000 [17:26:31<00:00, 21.58it/s]global step 1000000, trans_decision ep_re 439.36899928251387

{"global_step": 1000000, "eval_re": [439.36899928251387, 439.36899928251387, 
439.36899928251387, 439.36899928251387, 439.36899928251387, 439.36899928251387, 
439.36899928251387, 439.36899928251387, 439.36899928251387, 439.36899928251387],
"eval_len": [179, 179, 179, 179, 179, 179, 179, 179, 179, 179]}

100%|██████████| 1000000/1000000 [17:26:34<00:00, 15.93it/s]
