
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.0
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9996/1000000 [03:21<8:23:56, 32.74it/s]global step 10000, trans_decision ep_re 142.636985345224

{"global_step": 10000, "eval_re": [142.63698534522396, 142.63698534522396, 
142.63698534522396, 142.63698534522396, 142.63698534522396, 142.63698534522396, 
142.63698534522396, 142.63698534522396, 142.63698534522396, 142.63698534522396],
"eval_len": [103, 103, 103, 103, 103, 103, 103, 103, 103, 103]}

  2%|▏         | 19997/1000000 [10:03<8:28:09, 32.14it/s]global step 20000, trans_decision ep_re 169.16159395582514

{"global_step": 20000, "eval_re": [169.16159395582517, 169.16159395582517, 
169.16159395582517, 169.16159395582517, 169.16159395582517, 169.16159395582517, 
169.16159395582517, 169.16159395582517, 169.16159395582517, 169.16159395582517],
"eval_len": [133, 133, 133, 133, 133, 133, 133, 133, 133, 133]}

  3%|▎         | 29999/1000000 [17:00<8:21:52, 32.21it/s]global step 30000, trans_decision ep_re 239.37679922644574

{"global_step": 30000, "eval_re": [218.91200807886597, 423.55991955466334, 
218.91200807886597, 218.91200807886597, 218.91200807886597, 218.91200807886597, 
218.91200807886597, 218.91200807886597, 218.91200807886597, 218.91200807886597],
"eval_len": [187, 444, 187, 187, 187, 187, 187, 187, 187, 187]}

  4%|▍         | 39997/1000000 [23:40<8:09:55, 32.66it/s]global step 40000, trans_decision ep_re 91.73849717724457

{"global_step": 40000, "eval_re": [91.73849717724457, 91.73849717724457, 
91.73849717724457, 91.73849717724457, 91.73849717724457, 91.73849717724457, 
91.73849717724457, 91.73849717724457, 91.73849717724457, 91.73849717724457], 
"eval_len": [183, 183, 183, 183, 183, 183, 183, 183, 183, 183]}

  5%|▍         | 49997/1000000 [30:12<8:10:57, 32.25it/s]global step 50000, trans_decision ep_re 33.7334202189868

{"global_step": 50000, "eval_re": [33.7334202189868, 33.7334202189868, 
33.7334202189868, 33.7334202189868, 33.7334202189868, 33.7334202189868, 
33.7334202189868, 33.7334202189868, 33.7334202189868, 33.7334202189868], 
"eval_len": [35, 35, 35, 35, 35, 35, 35, 35, 35, 35]}

  6%|▌         | 59997/1000000 [36:51<8:04:07, 32.36it/s]global step 60000, trans_decision ep_re 34.94247802333287

{"global_step": 60000, "eval_re": [34.94247802333287, 34.94247802333287, 
34.94247802333287, 34.94247802333287, 34.94247802333287, 34.94247802333287, 
34.94247802333287, 34.94247802333287, 34.94247802333287, 34.94247802333287], 
"eval_len": [143, 143, 143, 143, 143, 143, 143, 143, 143, 143]}

  7%|▋         | 69996/1000000 [43:32<8:03:42, 32.04it/s]global step 70000, trans_decision ep_re 534.6008124492357

{"global_step": 70000, "eval_re": [534.6008124492357, 534.6008124492357, 
534.6008124492357, 534.6008124492357, 534.6008124492357, 534.6008124492357, 
534.6008124492357, 534.6008124492357, 534.6008124492357, 534.6008124492357], 
"eval_len": [223, 223, 223, 223, 223, 223, 223, 223, 223, 223]}

  8%|▊         | 79997/1000000 [50:30<7:57:46, 32.09it/s]global step 80000, trans_decision ep_re 683.6098384881295

{"global_step": 80000, "eval_re": [683.6098384881294, 683.6098384881294, 
683.6098384881294, 683.6098384881294, 683.6098384881294, 683.6098384881294, 
683.6098384881294, 683.6098384881294, 683.6098384881294, 683.6098384881294], 
"eval_len": [251, 251, 251, 251, 251, 251, 251, 251, 251, 251]}

  9%|▉         | 89997/1000000 [57:03<7:45:47, 32.56it/s]global step 90000, trans_decision ep_re 255.74526815160976

{"global_step": 90000, "eval_re": [255.74526815160976, 255.74526815160976, 
255.74526815160976, 255.74526815160976, 255.74526815160976, 255.74526815160976, 
255.74526815160976, 255.74526815160976, 255.74526815160976, 255.74526815160976],
"eval_len": [137, 137, 137, 137, 137, 137, 137, 137, 137, 137]}

 10%|▉         | 99999/1000000 [1:04:00<7:46:33, 32.15it/s]global step 100000, trans_decision ep_re 753.6803017375348

{"global_step": 100000, "eval_re": [753.6803017375348, 753.6803017375348, 
753.6803017375348, 753.6803017375348, 753.6803017375348, 753.6803017375348, 
753.6803017375348, 753.6803017375348, 753.6803017375348, 753.6803017375348], 
"eval_len": [272, 272, 272, 272, 272, 272, 272, 272, 272, 272]}

 11%|█         | 109997/1000000 [1:10:50<7:34:13, 32.66it/s]global step 110000, trans_decision ep_re 1214.391202326177

{"global_step": 110000, "eval_re": [1214.3912023261773, 1214.3912023261773, 
1214.3912023261773, 1214.3912023261773, 1214.3912023261773, 1214.3912023261773, 
1214.3912023261773, 1214.3912023261773, 1214.3912023261773, 1214.3912023261773],
"eval_len": [379, 379, 379, 379, 379, 379, 379, 379, 379, 379]}

 12%|█▏        | 119997/1000000 [1:17:30<7:26:40, 32.84it/s]global step 120000, trans_decision ep_re 403.8746335774996

{"global_step": 120000, "eval_re": [403.8746335774996, 403.8746335774996, 
403.8746335774996, 403.8746335774996, 403.8746335774996, 403.8746335774996, 
403.8746335774996, 403.8746335774996, 403.8746335774996, 403.8746335774996], 
"eval_len": [188, 188, 188, 188, 188, 188, 188, 188, 188, 188]}

 13%|█▎        | 129997/1000000 [1:24:10<7:21:08, 32.87it/s]global step 130000, trans_decision ep_re 1191.265898790054

{"global_step": 130000, "eval_re": [1191.265898790054, 1191.265898790054, 
1191.265898790054, 1191.265898790054, 1191.265898790054, 1191.265898790054, 
1191.265898790054, 1191.265898790054, 1191.265898790054, 1191.265898790054], 
"eval_len": [333, 333, 333, 333, 333, 333, 333, 333, 333, 333]}

 14%|█▍        | 139997/1000000 [1:30:43<7:24:41, 32.23it/s]global step 140000, trans_decision ep_re 61.04317859710884

{"global_step": 140000, "eval_re": [61.04317859710884, 61.04317859710884, 
61.04317859710884, 61.04317859710884, 61.04317859710884, 61.04317859710884, 
61.04317859710884, 61.04317859710884, 61.04317859710884, 61.04317859710884], 
"eval_len": [82, 82, 82, 82, 82, 82, 82, 82, 82, 82]}

 15%|█▍        | 149996/1000000 [1:37:30<7:11:12, 32.85it/s]global step 150000, trans_decision ep_re 1902.1790473239344

{"global_step": 150000, "eval_re": [1902.1790473239344, 1902.1790473239344, 
1902.1790473239344, 1902.1790473239344, 1902.1790473239344, 1902.1790473239344, 
1902.1790473239344, 1902.1790473239344, 1902.1790473239344, 1902.1790473239344],
"eval_len": [513, 513, 513, 513, 513, 513, 513, 513, 513, 513]}

 16%|█▌        | 159997/1000000 [1:44:20<7:06:31, 32.82it/s]global step 160000, trans_decision ep_re 1615.0295652261443

{"global_step": 160000, "eval_re": [1615.029565226144, 1615.029565226144, 
1615.029565226144, 1615.029565226144, 1615.029565226144, 1615.029565226144, 
1615.029565226144, 1615.029565226144, 1615.029565226144, 1615.029565226144], 
"eval_len": [421, 421, 421, 421, 421, 421, 421, 421, 421, 421]}

 17%|█▋        | 169997/1000000 [1:51:10<7:08:59, 32.25it/s]global step 170000, trans_decision ep_re 83.1749432385991

{"global_step": 170000, "eval_re": [83.1749432385991, 83.1749432385991, 
83.1749432385991, 83.1749432385991, 83.1749432385991, 83.1749432385991, 
83.1749432385991, 83.1749432385991, 83.1749432385991, 83.1749432385991], 
"eval_len": [92, 92, 92, 92, 92, 92, 92, 92, 92, 92]}

 18%|█▊        | 179997/1000000 [1:57:41<7:03:32, 32.27it/s]global step 180000, trans_decision ep_re 260.48227405932226

{"global_step": 180000, "eval_re": [260.48227405932226, 260.48227405932226, 
260.48227405932226, 260.48227405932226, 260.48227405932226, 260.48227405932226, 
260.48227405932226, 260.48227405932226, 260.48227405932226, 260.48227405932226],
"eval_len": [182, 182, 182, 182, 182, 182, 182, 182, 182, 182]}

 19%|█▉        | 189999/1000000 [2:04:40<6:51:04, 32.84it/s]global step 190000, trans_decision ep_re 2860.3513898005954

{"global_step": 190000, "eval_re": [2860.3513898005954, 2860.3513898005954, 
2860.3513898005954, 2860.3513898005954, 2860.3513898005954, 2860.3513898005954, 
2860.3513898005954, 2860.3513898005954, 2860.3513898005954, 2860.3513898005954],
"eval_len": [685, 685, 685, 685, 685, 685, 685, 685, 685, 685]}

 20%|█▉        | 199997/1000000 [2:11:30<6:45:31, 32.88it/s]global step 200000, trans_decision ep_re 3297.0223510187425

{"global_step": 200000, "eval_re": [3297.0223510187425, 3297.0223510187425, 
3297.0223510187425, 3297.0223510187425, 3297.0223510187425, 3297.0223510187425, 
3297.0223510187425, 3297.0223510187425, 3297.0223510187425, 3297.0223510187425],
"eval_len": [724, 724, 724, 724, 724, 724, 724, 724, 724, 724]}

 21%|██        | 209997/1000000 [2:18:20<6:48:39, 32.22it/s]global step 210000, trans_decision ep_re 206.7803907301146

{"global_step": 210000, "eval_re": [206.78039073011462, 206.78039073011462, 
206.78039073011462, 206.78039073011462, 206.78039073011462, 206.78039073011462, 
206.78039073011462, 206.78039073011462, 206.78039073011462, 206.78039073011462],
"eval_len": [228, 228, 228, 228, 228, 228, 228, 228, 228, 228]}

 22%|██▏       | 219999/1000000 [2:25:20<6:42:50, 32.27it/s]global step 220000, trans_decision ep_re 619.9289427655665

{"global_step": 220000, "eval_re": [619.9289427655665, 619.9289427655665, 
619.9289427655665, 619.9289427655665, 619.9289427655665, 619.9289427655665, 
619.9289427655665, 619.9289427655665, 619.9289427655665, 619.9289427655665], 
"eval_len": [397, 397, 397, 397, 397, 397, 397, 397, 397, 397]}

 23%|██▎       | 229997/1000000 [2:32:10<6:38:57, 32.17it/s]global step 230000, trans_decision ep_re 1993.357796771315

{"global_step": 230000, "eval_re": [1993.3577967713152, 1993.3577967713152, 
1993.3577967713152, 1993.3577967713152, 1993.3577967713152, 1993.3577967713152, 
1993.3577967713152, 1993.3577967713152, 1993.3577967713152, 1993.3577967713152],
"eval_len": [510, 510, 510, 510, 510, 510, 510, 510, 510, 510]}

 24%|██▍       | 239997/1000000 [2:39:10<6:36:39, 31.93it/s]global step 240000, trans_decision ep_re 1593.7450011502913

{"global_step": 240000, "eval_re": [1593.7450011502913, 1593.7450011502913, 
1593.7450011502913, 1593.7450011502913, 1593.7450011502913, 1593.7450011502913, 
1593.7450011502913, 1593.7450011502913, 1593.7450011502913, 1593.7450011502913],
"eval_len": [410, 410, 410, 410, 410, 410, 410, 410, 410, 410]}

 25%|██▍       | 249997/1000000 [2:46:10<6:31:45, 31.91it/s]global step 250000, trans_decision ep_re 2679.698832835776

{"global_step": 250000, "eval_re": [2679.698832835776, 2679.698832835776, 
2679.698832835776, 2679.698832835776, 2679.698832835776, 2679.698832835776, 
2679.698832835776, 2679.698832835776, 2679.698832835776, 2679.698832835776], 
"eval_len": [635, 635, 635, 635, 635, 635, 635, 635, 635, 635]}

 26%|██▌       | 259997/1000000 [2:52:52<6:25:02, 32.03it/s]global step 260000, trans_decision ep_re 76.0736329427489

{"global_step": 260000, "eval_re": [76.0736329427489, 76.0736329427489, 
76.0736329427489, 76.0736329427489, 76.0736329427489, 76.0736329427489, 
76.0736329427489, 76.0736329427489, 76.0736329427489, 76.0736329427489], 
"eval_len": [96, 96, 96, 96, 96, 96, 96, 96, 96, 96]}

 27%|██▋       | 269999/1000000 [2:59:50<6:17:06, 32.26it/s]global step 270000, trans_decision ep_re 2066.202643507403

{"global_step": 270000, "eval_re": [2066.202643507403, 2066.202643507403, 
2066.202643507403, 2066.202643507403, 2066.202643507403, 2066.202643507403, 
2066.202643507403, 2066.202643507403, 2066.202643507403, 2066.202643507403], 
"eval_len": [584, 584, 584, 584, 584, 584, 584, 584, 584, 584]}

 28%|██▊       | 279997/1000000 [3:06:50<6:11:45, 32.28it/s]global step 280000, trans_decision ep_re 1985.786160053638

{"global_step": 280000, "eval_re": [1905.3279103287957, 2726.4665053237713, 
1905.3279103287957, 1905.3279103287957, 1905.3279103287957, 1905.3279103287957, 
1888.771812582243, 1905.3279103287957, 1905.3279103287957, 1905.3279103287957], 
"eval_len": [597, 731, 597, 597, 597, 597, 591, 597, 597, 597]}

 29%|██▉       | 289997/1000000 [3:13:50<6:07:49, 32.17it/s]global step 290000, trans_decision ep_re 4779.484147709877

{"global_step": 290000, "eval_re": [4916.332262936916, 4916.332262936916, 
4916.332262936916, 4916.332262936916, 4916.332262936916, 4916.332262936916, 
4916.332262936916, 4916.332262936916, 3547.851110666532, 4916.332262936916], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 757, 1000]}

 30%|██▉       | 299997/1000000 [3:21:00<6:02:41, 32.17it/s]global step 300000, trans_decision ep_re 3096.1268742795246

{"global_step": 300000, "eval_re": [3096.1268742795246, 3096.1268742795246, 
3096.1268742795246, 3096.1268742795246, 3096.1268742795246, 3096.1268742795246, 
3096.1268742795246, 3096.1268742795246, 3096.1268742795246, 3096.1268742795246],
"eval_len": [690, 690, 690, 690, 690, 690, 690, 690, 690, 690]}

 31%|███       | 309997/1000000 [3:28:10<6:00:07, 31.93it/s]global step 310000, trans_decision ep_re 2897.4529359732087

{"global_step": 310000, "eval_re": [2897.4529359732087, 2897.4529359732087, 
2897.4529359732087, 2897.4529359732087, 2897.4529359732087, 2897.4529359732087, 
2897.4529359732087, 2897.4529359732087, 2897.4529359732087, 2897.4529359732087],
"eval_len": [646, 646, 646, 646, 646, 646, 646, 646, 646, 646]}

 32%|███▏      | 319997/1000000 [3:35:10<5:51:28, 32.24it/s]global step 320000, trans_decision ep_re 1272.1029926508352

{"global_step": 320000, "eval_re": [1272.1029926508354, 1272.1029926508354, 
1272.1029926508354, 1272.1029926508354, 1272.1029926508354, 1272.1029926508354, 
1272.1029926508354, 1272.1029926508354, 1272.1029926508354, 1272.1029926508354],
"eval_len": [391, 391, 391, 391, 391, 391, 391, 391, 391, 391]}

 33%|███▎      | 329997/1000000 [3:42:00<5:48:47, 32.02it/s]global step 330000, trans_decision ep_re 1639.9380832295624

{"global_step": 330000, "eval_re": [1639.9380832295626, 1639.9380832295626, 
1639.9380832295626, 1639.9380832295626, 1639.9380832295626, 1639.9380832295626, 
1639.9380832295626, 1639.9380832295626, 1639.9380832295626, 1639.9380832295626],
"eval_len": [427, 427, 427, 427, 427, 427, 427, 427, 427, 427]}

 34%|███▍      | 339997/1000000 [3:49:00<5:39:45, 32.38it/s]global step 340000, trans_decision ep_re 2405.41270297796

{"global_step": 340000, "eval_re": [2405.4127029779606, 2405.4127029779606, 
2405.4127029779606, 2405.4127029779606, 2405.4127029779606, 2405.4127029779606, 
2405.4127029779606, 2405.4127029779606, 2405.4127029779606, 2405.4127029779606],
"eval_len": [620, 620, 620, 620, 620, 620, 620, 620, 620, 620]}

 35%|███▍      | 349997/1000000 [3:56:00<5:35:59, 32.24it/s]global step 350000, trans_decision ep_re 918.1528697751617

{"global_step": 350000, "eval_re": [918.1528697751617, 918.1528697751617, 
918.1528697751617, 918.1528697751617, 918.1528697751617, 918.1528697751617, 
918.1528697751617, 918.1528697751617, 918.1528697751617, 918.1528697751617], 
"eval_len": [259, 259, 259, 259, 259, 259, 259, 259, 259, 259]}

 36%|███▌      | 359997/1000000 [4:02:36<5:27:44, 32.55it/s]global step 360000, trans_decision ep_re 56.60429083900213

{"global_step": 360000, "eval_re": [56.60429083900213, 56.60429083900213, 
56.60429083900213, 56.60429083900213, 56.60429083900213, 56.60429083900213, 
56.60429083900213, 56.60429083900213, 56.60429083900213, 56.60429083900213], 
"eval_len": [82, 82, 82, 82, 82, 82, 82, 82, 82, 82]}

 37%|███▋      | 369999/1000000 [4:09:21<5:30:45, 31.74it/s]global step 370000, trans_decision ep_re 79.76800740970086

{"global_step": 370000, "eval_re": [79.76800740970086, 79.76800740970086, 
79.76800740970086, 79.76800740970086, 79.76800740970086, 79.76800740970086, 
79.76800740970086, 79.76800740970086, 79.76800740970086, 79.76800740970086], 
"eval_len": [92, 92, 92, 92, 92, 92, 92, 92, 92, 92]}

 38%|███▊      | 379998/1000000 [4:16:20<5:22:09, 32.08it/s]global step 380000, trans_decision ep_re 4631.3828980422895

{"global_step": 380000, "eval_re": [4634.778213888557, 4634.778213888557, 
4634.778213888557, 4634.778213888557, 4634.778213888557, 4634.778213888557, 
4634.778213888557, 4634.778213888557, 4634.778213888557, 4600.825055425883], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389997/1000000 [4:23:30<5:19:15, 31.84it/s]global step 390000, trans_decision ep_re 1517.9496364976071

{"global_step": 390000, "eval_re": [1517.949636497607, 1517.949636497607, 
1517.949636497607, 1517.949636497607, 1517.949636497607, 1517.949636497607, 
1517.949636497607, 1517.949636497607, 1517.949636497607, 1517.949636497607], 
"eval_len": [372, 372, 372, 372, 372, 372, 372, 372, 372, 372]}

 40%|███▉      | 399997/1000000 [4:30:15<5:13:02, 31.95it/s]global step 400000, trans_decision ep_re 174.94918387673897

{"global_step": 400000, "eval_re": [174.94918387673897, 174.94918387673897, 
174.94918387673897, 174.94918387673897, 174.94918387673897, 174.94918387673897, 
174.94918387673897, 174.94918387673897, 174.94918387673897, 174.94918387673897],
"eval_len": [114, 114, 114, 114, 114, 114, 114, 114, 114, 114]}

 41%|████      | 409999/1000000 [4:37:01<5:06:52, 32.04it/s]global step 410000, trans_decision ep_re 25.39478149335071

{"global_step": 410000, "eval_re": [25.39478149335071, 25.39478149335071, 
25.39478149335071, 25.39478149335071, 25.39478149335071, 25.39478149335071, 
25.39478149335071, 25.39478149335071, 25.39478149335071, 25.39478149335071], 
"eval_len": [36, 36, 36, 36, 36, 36, 36, 36, 36, 36]}

 42%|████▏     | 419999/1000000 [4:44:00<5:04:06, 31.79it/s]global step 420000, trans_decision ep_re 1387.2480775564522

{"global_step": 420000, "eval_re": [1447.8920986056341, 1447.8920986056341, 
1447.8920986056341, 1447.8920986056341, 1447.8920986056341, 1447.8920986056341, 
1447.8920986056341, 1447.8920986056341, 841.4518881138139, 1447.8920986056341], 
"eval_len": [361, 361, 361, 361, 361, 361, 361, 361, 261, 361]}

 43%|████▎     | 429997/1000000 [4:50:50<4:56:55, 32.00it/s]global step 430000, trans_decision ep_re 1753.0614215231376

{"global_step": 430000, "eval_re": [1753.0614215231376, 1753.0614215231376, 
1753.0614215231376, 1753.0614215231376, 1753.0614215231376, 1753.0614215231376, 
1753.0614215231376, 1753.0614215231376, 1753.0614215231376, 1753.0614215231376],
"eval_len": [437, 437, 437, 437, 437, 437, 437, 437, 437, 437]}

 44%|████▍     | 439997/1000000 [4:57:50<4:51:17, 32.04it/s]global step 440000, trans_decision ep_re 4745.062643320219

{"global_step": 440000, "eval_re": [4745.062643320219, 4745.062643320219, 
4745.062643320219, 4745.062643320219, 4745.062643320219, 4745.062643320219, 
4745.062643320219, 4745.062643320219, 4745.062643320219, 4745.062643320219], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449997/1000000 [5:05:00<4:47:25, 31.89it/s]global step 450000, trans_decision ep_re 2931.730115152685

{"global_step": 450000, "eval_re": [2931.730115152685, 2931.730115152685, 
2931.730115152685, 2931.730115152685, 2931.730115152685, 2931.730115152685, 
2931.730115152685, 2931.730115152685, 2931.730115152685, 2931.730115152685], 
"eval_len": [657, 657, 657, 657, 657, 657, 657, 657, 657, 657]}

 46%|████▌     | 459997/1000000 [5:11:41<4:40:09, 32.12it/s]global step 460000, trans_decision ep_re 221.8034690793658

{"global_step": 460000, "eval_re": [221.8034690793658, 221.8034690793658, 
221.8034690793658, 221.8034690793658, 221.8034690793658, 221.8034690793658, 
221.8034690793658, 221.8034690793658, 221.8034690793658, 221.8034690793658], 
"eval_len": [143, 143, 143, 143, 143, 143, 143, 143, 143, 143]}

 47%|████▋     | 469999/1000000 [5:18:40<4:33:12, 32.33it/s]global step 470000, trans_decision ep_re 618.597432559992

{"global_step": 470000, "eval_re": [618.597432559992, 618.597432559992, 
618.597432559992, 618.597432559992, 618.597432559992, 618.597432559992, 
618.597432559992, 618.597432559992, 618.597432559992, 618.597432559992], 
"eval_len": [207, 207, 207, 207, 207, 207, 207, 207, 207, 207]}

 48%|████▊     | 479997/1000000 [5:25:20<4:28:11, 32.32it/s]global step 480000, trans_decision ep_re 2420.285747425753

{"global_step": 480000, "eval_re": [2420.2857474257526, 2420.2857474257526, 
2420.2857474257526, 2420.2857474257526, 2420.2857474257526, 2420.2857474257526, 
2420.2857474257526, 2420.2857474257526, 2420.2857474257526, 2420.2857474257526],
"eval_len": [584, 584, 584, 584, 584, 584, 584, 584, 584, 584]}

 49%|████▉     | 489997/1000000 [5:32:20<4:24:26, 32.14it/s]global step 490000, trans_decision ep_re 1134.8630625268156

{"global_step": 490000, "eval_re": [1134.8630625268156, 1134.8630625268156, 
1134.8630625268156, 1134.8630625268156, 1134.8630625268156, 1134.8630625268156, 
1134.8630625268156, 1134.8630625268156, 1134.8630625268156, 1134.8630625268156],
"eval_len": [339, 339, 339, 339, 339, 339, 339, 339, 339, 339]}

 50%|████▉     | 499997/1000000 [5:39:10<4:19:00, 32.17it/s]global step 500000, trans_decision ep_re 1445.1123706729957

{"global_step": 500000, "eval_re": [1445.1123706729957, 1445.1123706729957, 
1445.1123706729957, 1445.1123706729957, 1445.1123706729957, 1445.1123706729957, 
1445.1123706729957, 1445.1123706729957, 1445.1123706729957, 1445.1123706729957],
"eval_len": [415, 415, 415, 415, 415, 415, 415, 415, 415, 415]}

 51%|█████     | 509997/1000000 [5:46:10<4:15:31, 31.96it/s]global step 510000, trans_decision ep_re 4860.502072690169

{"global_step": 510000, "eval_re": [4860.5020726901685, 4860.5020726901685, 
4860.5020726901685, 4860.5020726901685, 4860.5020726901685, 4860.5020726901685, 
4860.5020726901685, 4860.5020726901685, 4860.5020726901685, 4860.5020726901685],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519997/1000000 [5:53:20<4:12:18, 31.71it/s]global step 520000, trans_decision ep_re 2285.947886983023

{"global_step": 520000, "eval_re": [2285.947886983023, 2285.947886983023, 
2285.947886983023, 2285.947886983023, 2285.947886983023, 2285.947886983023, 
2285.947886983023, 2285.947886983023, 2285.947886983023, 2285.947886983023], 
"eval_len": [560, 560, 560, 560, 560, 560, 560, 560, 560, 560]}

 53%|█████▎    | 529997/1000000 [6:00:20<4:05:19, 31.93it/s]global step 530000, trans_decision ep_re 1992.7083226110772

{"global_step": 530000, "eval_re": [2720.852545739125, 1911.8034089301832, 
1911.8034089301832, 1911.8034089301832, 1911.8034089301832, 1911.8034089301832, 
1911.8034089301832, 1911.8034089301832, 1911.8034089301832, 1911.8034089301832],
"eval_len": [613, 454, 454, 454, 454, 454, 454, 454, 454, 454]}

 54%|█████▍    | 539997/1000000 [6:07:05<4:05:00, 31.29it/s]global step 540000, trans_decision ep_re 24.60995930717297

{"global_step": 540000, "eval_re": [24.60995930717297, 24.60995930717297, 
24.60995930717297, 24.60995930717297, 24.60995930717297, 24.60995930717297, 
24.60995930717297, 24.60995930717297, 24.60995930717297, 24.60995930717297], 
"eval_len": [35, 35, 35, 35, 35, 35, 35, 35, 35, 35]}

 55%|█████▍    | 549997/1000000 [6:14:00<3:53:28, 32.12it/s]global step 550000, trans_decision ep_re 3325.420666276166

{"global_step": 550000, "eval_re": [3325.420666276166, 3325.420666276166, 
3325.420666276166, 3325.420666276166, 3325.420666276166, 3325.420666276166, 
3325.420666276166, 3325.420666276166, 3325.420666276166, 3325.420666276166], 
"eval_len": [783, 783, 783, 783, 783, 783, 783, 783, 783, 783]}

 56%|█████▌    | 559997/1000000 [6:21:10<3:51:11, 31.72it/s]global step 560000, trans_decision ep_re 1887.6512901470876

{"global_step": 560000, "eval_re": [1887.6512901470876, 1887.6512901470876, 
1887.6512901470876, 1887.6512901470876, 1887.6512901470876, 1887.6512901470876, 
1887.6512901470876, 1887.6512901470876, 1887.6512901470876, 1887.6512901470876],
"eval_len": [471, 471, 471, 471, 471, 471, 471, 471, 471, 471]}

 57%|█████▋    | 569997/1000000 [6:28:10<3:46:44, 31.61it/s]global step 570000, trans_decision ep_re 1312.1219919318298

{"global_step": 570000, "eval_re": [1312.1219919318298, 1312.1219919318298, 
1312.1219919318298, 1312.1219919318298, 1312.1219919318298, 1312.1219919318298, 
1312.1219919318298, 1312.1219919318298, 1312.1219919318298, 1312.1219919318298],
"eval_len": [352, 352, 352, 352, 352, 352, 352, 352, 352, 352]}

 58%|█████▊    | 579997/1000000 [6:35:10<3:39:26, 31.90it/s]global step 580000, trans_decision ep_re 4924.864557142102

{"global_step": 580000, "eval_re": [4924.864557142102, 4924.864557142102, 
4924.864557142102, 4924.864557142102, 4924.864557142102, 4924.864557142102, 
4924.864557142102, 4924.864557142102, 4924.864557142102, 4924.864557142102], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589997/1000000 [6:42:06<3:35:19, 31.73it/s]global step 590000, trans_decision ep_re 116.71780256413074

{"global_step": 590000, "eval_re": [116.71780256413072, 116.71780256413072, 
116.71780256413072, 116.71780256413072, 116.71780256413072, 116.71780256413072, 
116.71780256413072, 116.71780256413072, 116.71780256413072, 116.71780256413072],
"eval_len": [107, 107, 107, 107, 107, 107, 107, 107, 107, 107]}

 60%|█████▉    | 599999/1000000 [6:49:10<3:28:25, 31.99it/s]global step 600000, trans_decision ep_re 1457.0771427401116

{"global_step": 600000, "eval_re": [1457.0771427401114, 1457.0771427401114, 
1457.0771427401114, 1457.0771427401114, 1457.0771427401114, 1457.0771427401114, 
1457.0771427401114, 1457.0771427401114, 1457.0771427401114, 1457.0771427401114],
"eval_len": [364, 364, 364, 364, 364, 364, 364, 364, 364, 364]}

 61%|██████    | 609997/1000000 [6:56:00<3:22:22, 32.12it/s]global step 610000, trans_decision ep_re 4625.478786753161

{"global_step": 610000, "eval_re": [4625.478786753161, 4625.478786753161, 
4625.478786753161, 4625.478786753161, 4625.478786753161, 4625.478786753161, 
4625.478786753161, 4625.478786753161, 4625.478786753161, 4625.478786753161], 
"eval_len": [959, 959, 959, 959, 959, 959, 959, 959, 959, 959]}

 62%|██████▏   | 619997/1000000 [7:02:54<3:17:32, 32.06it/s]global step 620000, trans_decision ep_re 30.972443291617147

{"global_step": 620000, "eval_re": [30.972443291617143, 30.972443291617143, 
30.972443291617143, 30.972443291617143, 30.972443291617143, 30.972443291617143, 
30.972443291617143, 30.972443291617143, 30.972443291617143, 30.972443291617143],
"eval_len": [35, 35, 35, 35, 35, 35, 35, 35, 35, 35]}

 63%|██████▎   | 629997/1000000 [7:09:50<3:11:03, 32.28it/s]global step 630000, trans_decision ep_re 3531.2299174578134

{"global_step": 630000, "eval_re": [3531.2299174578134, 3531.2299174578134, 
3531.2299174578134, 3531.2299174578134, 3531.2299174578134, 3531.2299174578134, 
3531.2299174578134, 3531.2299174578134, 3531.2299174578134, 3531.2299174578134],
"eval_len": [754, 754, 754, 754, 754, 754, 754, 754, 754, 754]}

 64%|██████▍   | 639997/1000000 [7:16:40<3:06:23, 32.19it/s]global step 640000, trans_decision ep_re 199.7110927661454

{"global_step": 640000, "eval_re": [199.7110927661454, 199.7110927661454, 
199.7110927661454, 199.7110927661454, 199.7110927661454, 199.7110927661454, 
199.7110927661454, 199.7110927661454, 199.7110927661454, 199.7110927661454], 
"eval_len": [127, 127, 127, 127, 127, 127, 127, 127, 127, 127]}

 65%|██████▍   | 649999/1000000 [7:23:40<3:02:55, 31.89it/s]global step 650000, trans_decision ep_re 1868.8846727650784

{"global_step": 650000, "eval_re": [1868.8846727650782, 1868.8846727650782, 
1868.8846727650782, 1868.8846727650782, 1868.8846727650782, 1868.8846727650782, 
1868.8846727650782, 1868.8846727650782, 1868.8846727650782, 1868.8846727650782],
"eval_len": [441, 441, 441, 441, 441, 441, 441, 441, 441, 441]}

 66%|██████▌   | 659997/1000000 [7:30:40<2:58:28, 31.75it/s]global step 660000, trans_decision ep_re 3326.4559933081428

{"global_step": 660000, "eval_re": [3326.4559933081428, 3326.4559933081428, 
3326.4559933081428, 3326.4559933081428, 3326.4559933081428, 3326.4559933081428, 
3326.4559933081428, 3326.4559933081428, 3326.4559933081428, 3326.4559933081428],
"eval_len": [672, 672, 672, 672, 672, 672, 672, 672, 672, 672]}

 67%|██████▋   | 669999/1000000 [7:37:25<2:53:46, 31.65it/s]global step 670000, trans_decision ep_re 148.00869991784268

{"global_step": 670000, "eval_re": [148.00869991784268, 148.00869991784268, 
148.00869991784268, 148.00869991784268, 148.00869991784268, 148.00869991784268, 
148.00869991784268, 148.00869991784268, 148.00869991784268, 148.00869991784268],
"eval_len": [101, 101, 101, 101, 101, 101, 101, 101, 101, 101]}

 68%|██████▊   | 679997/1000000 [7:44:30<2:45:29, 32.23it/s]global step 680000, trans_decision ep_re 1768.0122625456238

{"global_step": 680000, "eval_re": [1768.012262545624, 1768.012262545624, 
1768.012262545624, 1768.012262545624, 1768.012262545624, 1768.012262545624, 
1768.012262545624, 1768.012262545624, 1768.012262545624, 1768.012262545624], 
"eval_len": [402, 402, 402, 402, 402, 402, 402, 402, 402, 402]}

 69%|██████▉   | 689997/1000000 [7:51:01<2:39:19, 32.43it/s]global step 690000, trans_decision ep_re 602.6447840745138

{"global_step": 690000, "eval_re": [602.6447840745138, 602.6447840745138, 
602.6447840745138, 602.6447840745138, 602.6447840745138, 602.6447840745138, 
602.6447840745138, 602.6447840745138, 602.6447840745138, 602.6447840745138], 
"eval_len": [208, 208, 208, 208, 208, 208, 208, 208, 208, 208]}

 70%|██████▉   | 699999/1000000 [7:57:45<2:34:54, 32.28it/s]global step 700000, trans_decision ep_re 103.79405320912403

{"global_step": 700000, "eval_re": [107.18777735418601, 107.18777735418601, 
73.25053590356633, 107.18777735418601, 107.18777735418601, 107.18777735418601, 
107.18777735418601, 107.18777735418601, 107.18777735418601, 107.18777735418601],
"eval_len": [121, 121, 80, 121, 121, 121, 121, 121, 121, 121]}

 71%|███████   | 709997/1000000 [8:04:40<2:31:35, 31.89it/s]global step 710000, trans_decision ep_re 3468.1564466158343

{"global_step": 710000, "eval_re": [3468.1564466158343, 3468.1564466158343, 
3468.1564466158343, 3468.1564466158343, 3468.1564466158343, 3468.1564466158343, 
3468.1564466158343, 3468.1564466158343, 3468.1564466158343, 3468.1564466158343],
"eval_len": [755, 755, 755, 755, 755, 755, 755, 755, 755, 755]}

 72%|███████▏  | 719997/1000000 [8:11:34<2:26:39, 31.82it/s]global step 720000, trans_decision ep_re 29.32801491228033

{"global_step": 720000, "eval_re": [29.328014912280334, 29.328014912280334, 
29.328014912280334, 29.328014912280334, 29.328014912280334, 29.328014912280334, 
29.328014912280334, 29.328014912280334, 29.328014912280334, 29.328014912280334],
"eval_len": [35, 35, 35, 35, 35, 35, 35, 35, 35, 35]}

 73%|███████▎  | 729997/1000000 [8:18:30<2:19:28, 32.27it/s]global step 730000, trans_decision ep_re 621.8974983248434

{"global_step": 730000, "eval_re": [621.8974983248434, 621.8974983248434, 
621.8974983248434, 621.8974983248434, 621.8974983248434, 621.8974983248434, 
621.8974983248434, 621.8974983248434, 621.8974983248434, 621.8974983248434], 
"eval_len": [210, 210, 210, 210, 210, 210, 210, 210, 210, 210]}

 74%|███████▍  | 739997/1000000 [8:25:20<2:15:57, 31.87it/s]global step 740000, trans_decision ep_re 2390.4340210627893

{"global_step": 740000, "eval_re": [2390.4340210627893, 2390.4340210627893, 
2390.4340210627893, 2390.4340210627893, 2390.4340210627893, 2390.4340210627893, 
2390.4340210627893, 2390.4340210627893, 2390.4340210627893, 2390.4340210627893],
"eval_len": [543, 543, 543, 543, 543, 543, 543, 543, 543, 543]}

 75%|███████▍  | 749997/1000000 [8:32:20<2:08:53, 32.33it/s]global step 750000, trans_decision ep_re 2427.4111460700337

{"global_step": 750000, "eval_re": [2427.4111460700337, 2427.4111460700337, 
2427.4111460700337, 2427.4111460700337, 2427.4111460700337, 2427.4111460700337, 
2427.4111460700337, 2427.4111460700337, 2427.4111460700337, 2427.4111460700337],
"eval_len": [513, 513, 513, 513, 513, 513, 513, 513, 513, 513]}

 76%|███████▌  | 759997/1000000 [8:38:54<2:02:35, 32.63it/s]global step 760000, trans_decision ep_re 31.126139602743745

{"global_step": 760000, "eval_re": [31.12613960274374, 31.12613960274374, 
31.12613960274374, 31.12613960274374, 31.12613960274374, 31.12613960274374, 
31.12613960274374, 31.12613960274374, 31.12613960274374, 31.12613960274374], 
"eval_len": [35, 35, 35, 35, 35, 35, 35, 35, 35, 35]}

 77%|███████▋  | 769997/1000000 [8:45:50<1:56:17, 32.96it/s]global step 770000, trans_decision ep_re 3336.058730258518

{"global_step": 770000, "eval_re": [3336.0587302585186, 3336.0587302585186, 
3336.0587302585186, 3336.0587302585186, 3336.0587302585186, 3336.0587302585186, 
3336.0587302585186, 3336.0587302585186, 3336.0587302585186, 3336.0587302585186],
"eval_len": [738, 738, 738, 738, 738, 738, 738, 738, 738, 738]}

 78%|███████▊  | 779997/1000000 [8:52:40<1:52:26, 32.61it/s]global step 780000, trans_decision ep_re 4786.417141243752

{"global_step": 780000, "eval_re": [4786.417141243752, 4786.417141243752, 
4786.417141243752, 4786.417141243752, 4786.417141243752, 4786.417141243752, 
4786.417141243752, 4786.417141243752, 4786.417141243752, 4786.417141243752], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789997/1000000 [8:59:50<1:47:17, 32.62it/s]global step 790000, trans_decision ep_re 5041.111242555109

{"global_step": 790000, "eval_re": [5041.111242555108, 5041.111242555108, 
5041.111242555108, 5041.111242555108, 5041.111242555108, 5041.111242555108, 
5041.111242555108, 5041.111242555108, 5041.111242555108, 5041.111242555108], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799997/1000000 [9:06:50<1:43:21, 32.25it/s]global step 800000, trans_decision ep_re 4796.552449245515

{"global_step": 800000, "eval_re": [4796.552449245515, 4796.552449245515, 
4796.552449245515, 4796.552449245515, 4796.552449245515, 4796.552449245515, 
4796.552449245515, 4796.552449245515, 4796.552449245515, 4796.552449245515], 
"eval_len": [976, 976, 976, 976, 976, 976, 976, 976, 976, 976]}

 81%|████████  | 809997/1000000 [9:13:45<1:37:22, 32.52it/s]global step 810000, trans_decision ep_re 32.27345897042845

{"global_step": 810000, "eval_re": [32.27345897042845, 32.27345897042845, 
32.27345897042845, 32.27345897042845, 32.27345897042845, 32.27345897042845, 
32.27345897042845, 32.27345897042845, 32.27345897042845, 32.27345897042845], 
"eval_len": [36, 36, 36, 36, 36, 36, 36, 36, 36, 36]}

 82%|████████▏ | 819997/1000000 [9:20:40<1:32:21, 32.48it/s]global step 820000, trans_decision ep_re 5202.618491452479

{"global_step": 820000, "eval_re": [5202.618491452479, 5202.618491452479, 
5202.618491452479, 5202.618491452479, 5202.618491452479, 5202.618491452479, 
5202.618491452479, 5202.618491452479, 5202.618491452479, 5202.618491452479], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829997/1000000 [9:27:28<1:27:43, 32.30it/s]global step 830000, trans_decision ep_re 33.13657709296799

{"global_step": 830000, "eval_re": [33.136577092968, 33.136577092968, 
33.136577092968, 33.136577092968, 33.136577092968, 33.136577092968, 
33.136577092968, 33.136577092968, 33.136577092968, 33.136577092968], "eval_len":
[36, 36, 36, 36, 36, 36, 36, 36, 36, 36]}

 84%|████████▍ | 839997/1000000 [9:34:20<1:20:41, 33.05it/s]global step 840000, trans_decision ep_re 1338.2296952073077

{"global_step": 840000, "eval_re": [1205.9348995794087, 1205.9348995794087, 
1205.9348995794087, 2528.8828558584, 1205.9348995794087, 1205.9348995794087, 
1205.9348995794087, 1205.9348995794087, 1205.9348995794087, 1205.9348995794087],
"eval_len": [341, 341, 341, 587, 341, 341, 341, 341, 341, 341]}

 85%|████████▍ | 849999/1000000 [9:40:50<1:16:33, 32.65it/s]global step 850000, trans_decision ep_re 60.66308026482809

{"global_step": 850000, "eval_re": [60.663080264828096, 60.663080264828096, 
60.663080264828096, 60.663080264828096, 60.663080264828096, 60.663080264828096, 
60.663080264828096, 60.663080264828096, 60.663080264828096, 60.663080264828096],
"eval_len": [157, 157, 157, 157, 157, 157, 157, 157, 157, 157]}

 86%|████████▌ | 859998/1000000 [9:47:50<1:11:03, 32.83it/s]global step 860000, trans_decision ep_re 2749.539364276365

{"global_step": 860000, "eval_re": [2749.5393642763647, 2749.5393642763647, 
2749.5393642763647, 2749.5393642763647, 2749.5393642763647, 2749.5393642763647, 
2749.5393642763647, 2749.5393642763647, 2749.5393642763647, 2749.5393642763647],
"eval_len": [559, 559, 559, 559, 559, 559, 559, 559, 559, 559]}

 87%|████████▋ | 869997/1000000 [9:54:24<1:07:04, 32.30it/s]global step 870000, trans_decision ep_re 438.1698953820961

{"global_step": 870000, "eval_re": [438.16989538209606, 438.16989538209606, 
438.16989538209606, 438.16989538209606, 438.16989538209606, 438.16989538209606, 
438.16989538209606, 438.16989538209606, 438.16989538209606, 438.16989538209606],
"eval_len": [168, 168, 168, 168, 168, 168, 168, 168, 168, 168]}

 88%|████████▊ | 879999/1000000 [10:01:20<1:00:40, 32.96it/s]global step 880000, trans_decision ep_re 3391.639647971572

{"global_step": 880000, "eval_re": [3391.639647971572, 3391.639647971572, 
3391.639647971572, 3391.639647971572, 3391.639647971572, 3391.639647971572, 
3391.639647971572, 3391.639647971572, 3391.639647971572, 3391.639647971572], 
"eval_len": [748, 748, 748, 748, 748, 748, 748, 748, 748, 748]}

 89%|████████▉ | 889997/1000000 [10:08:02<55:50, 32.83it/s]global step 890000, trans_decision ep_re 854.4281637976962

{"global_step": 890000, "eval_re": [762.4115681497364, 762.4115681497364, 
762.4115681497364, 762.4115681497364, 1682.5775246293365, 762.4115681497364, 
762.4115681497364, 762.4115681497364, 762.4115681497364, 762.4115681497364], 
"eval_len": [224, 224, 224, 224, 376, 224, 224, 224, 224, 224]}

 90%|████████▉ | 899996/1000000 [10:15:00<50:46, 32.83it/s]global step 900000, trans_decision ep_re 4595.18922760651

{"global_step": 900000, "eval_re": [4669.933172134841, 4669.933172134841, 
4669.933172134841, 4669.933172134841, 4669.933172134841, 4669.933172134841, 
4669.933172134841, 4669.933172134841, 4669.933172134841, 3922.493726851535], 
"eval_len": [924, 924, 924, 924, 924, 924, 924, 924, 924, 805]}

 91%|█████████ | 909997/1000000 [10:22:00<45:39, 32.86it/s]global step 910000, trans_decision ep_re 3685.957924180303

{"global_step": 910000, "eval_re": [3582.6464083177416, 3582.6464083177416, 
3582.6464083177416, 4615.761566943361, 3582.6464083177416, 3582.6464083177416, 
3582.6464083177416, 3582.6464083177416, 3582.6464083177416, 3582.6464083177416],
"eval_len": [710, 710, 710, 1000, 710, 710, 710, 710, 710, 710]}

 92%|█████████▏| 919997/1000000 [10:28:46<40:59, 32.52it/s]global step 920000, trans_decision ep_re 218.71361786343613

{"global_step": 920000, "eval_re": [218.71361786343616, 218.71361786343616, 
218.71361786343616, 218.71361786343616, 218.71361786343616, 218.71361786343616, 
218.71361786343616, 218.71361786343616, 218.71361786343616, 218.71361786343616],
"eval_len": [123, 123, 123, 123, 123, 123, 123, 123, 123, 123]}

 93%|█████████▎| 929999/1000000 [10:35:40<35:56, 32.46it/s]global step 930000, trans_decision ep_re 5104.732613580593

{"global_step": 930000, "eval_re": [5104.732613580593, 5104.732613580593, 
5104.732613580593, 5104.732613580593, 5104.732613580593, 5104.732613580593, 
5104.732613580593, 5104.732613580593, 5104.732613580593, 5104.732613580593], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939997/1000000 [10:42:32<30:52, 32.40it/s]global step 940000, trans_decision ep_re 148.9334193255755

{"global_step": 940000, "eval_re": [148.93341932557553, 148.93341932557553, 
148.93341932557553, 148.93341932557553, 148.93341932557553, 148.93341932557553, 
148.93341932557553, 148.93341932557553, 148.93341932557553, 148.93341932557553],
"eval_len": [153, 153, 153, 153, 153, 153, 153, 153, 153, 153]}

 95%|█████████▍| 949999/1000000 [10:49:30<25:51, 32.22it/s]global step 950000, trans_decision ep_re 1588.2426557186009

{"global_step": 950000, "eval_re": [1619.2916362039607, 1619.2916362039607, 
1619.2916362039607, 1619.2916362039607, 1308.801831350362, 1619.2916362039607, 
1619.2916362039607, 1619.2916362039607, 1619.2916362039607, 1619.2916362039607],
"eval_len": [403, 403, 403, 403, 320, 403, 403, 403, 403, 403]}

 96%|█████████▌| 959997/1000000 [10:56:04<20:17, 32.86it/s]global step 960000, trans_decision ep_re 257.599445668421

{"global_step": 960000, "eval_re": [257.59944566842097, 257.59944566842097, 
257.59944566842097, 257.59944566842097, 257.59944566842097, 257.59944566842097, 
257.59944566842097, 257.59944566842097, 257.59944566842097, 257.59944566842097],
"eval_len": [224, 224, 224, 224, 224, 224, 224, 224, 224, 224]}

 97%|█████████▋| 969996/1000000 [11:03:00<15:34, 32.12it/s]global step 970000, trans_decision ep_re 4464.292553280613

{"global_step": 970000, "eval_re": [4663.139007539943, 4663.139007539943, 
4663.139007539943, 4663.139007539943, 4663.139007539943, 4663.139007539943, 
4663.139007539943, 4663.139007539943, 4663.139007539943, 2674.6744649466464], 
"eval_len": [958, 958, 958, 958, 958, 958, 958, 958, 958, 584]}

 98%|█████████▊| 979997/1000000 [11:10:10<10:24, 32.03it/s]global step 980000, trans_decision ep_re 3002.8283377688267

{"global_step": 980000, "eval_re": [3002.8283377688263, 3002.8283377688263, 
3002.8283377688263, 3002.8283377688263, 3002.8283377688263, 3002.8283377688263, 
3002.8283377688263, 3002.8283377688263, 3002.8283377688263, 3002.8283377688263],
"eval_len": [637, 637, 637, 637, 637, 637, 637, 637, 637, 637]}

 99%|█████████▉| 989997/1000000 [11:17:10<05:05, 32.79it/s]global step 990000, trans_decision ep_re 5066.552189099045

{"global_step": 990000, "eval_re": [5262.451433683473, 5262.451433683473, 
3303.4589878391957, 5262.451433683473, 5262.451433683473, 5262.451433683473, 
5262.451433683473, 5262.451433683473, 5262.451433683473, 5262.451433683473], 
"eval_len": [1000, 1000, 724, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999997/1000000 [11:24:10<00:00, 32.34it/s]global step 1000000, trans_decision ep_re 5172.4943379028

{"global_step": 1000000, "eval_re": [5172.494337902801, 5172.494337902801, 
5172.494337902801, 5172.494337902801, 5172.494337902801, 5172.494337902801, 
5172.494337902801, 5172.494337902801, 5172.494337902801, 5172.494337902801], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [11:24:37<00:00, 24.34it/s]
