
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:12<10:25:46, 26.37it/s]global step 10000, trans_decision ep_re 43.207732241227355

{"global_step": 10000, "eval_re": [3.4096768363367156, 73.79198898993928, 
11.219146538499654, 1.198121010055588, 14.458236318673247, 57.111369974474464, 
227.29217856512935, 35.4378097410669, 4.678139599287476, 3.480654838810884], 
"eval_len": [19, 113, 22, 11, 29, 120, 123, 141, 15, 21]}

  2%|▏         | 19999/1000000 [12:50<10:23:16, 26.21it/s]global step 20000, trans_decision ep_re 5.757716492594268

{"global_step": 20000, "eval_re": [6.869619763430696, 5.8275075323932795, 
1.618195933387764, 1.607165354135981, 12.248324569224168, 4.476324259406816, 
3.964226902916816, 13.61461883895399, 4.272727560381341, 3.0784542117118194], 
"eval_len": [17, 21, 16, 21, 31, 96, 16, 25, 19, 21]}

  3%|▎         | 29999/1000000 [21:20<10:20:39, 26.05it/s]global step 30000, trans_decision ep_re 68.54306066776111

{"global_step": 30000, "eval_re": [3.91310410252966, 31.16049423434578, 
3.912507168695176, 2.6713512774030415, 307.07208442529367, 6.5703132669581334, 
3.7655745411802815, 10.52033761512189, 12.815289240684312, 303.02955080539914], 
"eval_len": [19, 47, 39, 19, 257, 37, 25, 28, 25, 208]}

  4%|▍         | 39999/1000000 [29:50<10:15:41, 25.99it/s]global step 40000, trans_decision ep_re 26.87430280878827

{"global_step": 40000, "eval_re": [2.8067683532705865, 6.834794957675623, 
4.827197220236685, 5.363935634983293, 223.80060719518488, 4.024754899499507, 
6.655960891421104, 5.403867364219167, 4.92384974294029, 4.101291828451617], 
"eval_len": [17, 18, 19, 19, 205, 18, 18, 18, 19, 21]}

  5%|▍         | 49999/1000000 [38:20<10:13:09, 25.82it/s]global step 50000, trans_decision ep_re 6.778972959812933

{"global_step": 50000, "eval_re": [8.327524189152202, 9.831064384352967, 
4.3281746275981225, 1.3397323762985345, 5.166679947615668, 10.147938239941658, 
9.502150220843887, 12.111345612598349, 4.609940469830873, 2.42517952989706], 
"eval_len": [23, 25, 19, 19, 24, 24, 42, 26, 14, 13]}

  6%|▌         | 59999/1000000 [46:34<9:59:25, 26.14it/s]global step 60000, trans_decision ep_re 14.016730314710259

{"global_step": 60000, "eval_re": [22.642563145688754, 8.689557072380373, 
3.3409424483166794, -0.0017247457622625167, 18.04429995612835, 
12.920368430000261, 5.192331877964011, 44.696110789742235, 15.78318091691417, 
8.859673255730026], "eval_len": [40, 18, 38, 35, 35, 29, 16, 48, 27, 22]}

  7%|▋         | 69998/1000000 [55:03<9:44:56, 26.50it/s]global step 70000, trans_decision ep_re 18.793360969211943

{"global_step": 70000, "eval_re": [92.81306503069284, 8.12729558512992, 
7.567196578923882, 5.754489940576067, 5.509613772697474, 1.2397106357882641, 
7.215063498315706, 3.9892861478466664, 44.747165252428516, 10.970723249720097], 
"eval_len": [103, 28, 17, 22, 34, 21, 29, 15, 77, 35]}

  8%|▊         | 79997/1000000 [1:03:31<9:47:19, 26.11it/s]global step 80000, trans_decision ep_re 8.604997331130686

{"global_step": 80000, "eval_re": [4.967916747001041, 6.707322131296143, 
16.106381266740453, 12.955689235361506, 7.028783276531925, 7.489071910269167, 
7.14079571953425, 7.799906833023877, 3.075290484433653, 12.778815707114855], 
"eval_len": [29, 19, 41, 28, 23, 27, 22, 20, 17, 26]}

  9%|▉         | 89999/1000000 [1:12:04<9:46:26, 25.86it/s]global step 90000, trans_decision ep_re 12.038520841099556

{"global_step": 90000, "eval_re": [1.7165904222246096, 9.362477473215426, 
5.71119487310848, 1.9843662076149895, 2.7119893329495905, 86.96934099654219, 
5.220263414908226, 3.8035963774795474, -0.20501295080949472, 
3.1104022637620115], "eval_len": [31, 30, 35, 15, 26, 113, 40, 27, 18, 27]}

 10%|▉         | 99999/1000000 [1:20:50<9:51:15, 25.37it/s]global step 100000, trans_decision ep_re 4.696298012117166

{"global_step": 100000, "eval_re": [2.5751273771676852, 2.7850971776565725, 
2.760126112889108, 0.2486937838594299, 3.846706744716011, 9.928003050485872, 
-0.03141920491749345, 7.082750462569748, 8.785599770998868, 8.982294845745852], 
"eval_len": [16, 19, 18, 11, 24, 28, 15, 19, 18, 25]}

 11%|█         | 109999/1000000 [1:29:20<9:45:26, 25.34it/s]global step 110000, trans_decision ep_re 28.787430470024162

{"global_step": 110000, "eval_re": [133.9781384449719, 5.236265067968555, 
3.7276984639023834, 14.3151702860959, 3.877090477019021, 3.7842167807008797, 
6.40893587132226, 0.665376913380932, 94.72666720392492, 21.154745190954827], 
"eval_len": [114, 27, 14, 28, 14, 18, 23, 14, 108, 30]}

 12%|█▏        | 119998/1000000 [1:38:04<9:35:46, 25.47it/s]global step 120000, trans_decision ep_re 5.755860239989188

{"global_step": 120000, "eval_re": [10.420177394340406, 10.808665459177549, 
2.728207928959554, 1.3545825688210802, 2.725421789762365, 6.4505576602502455, 
0.23627410788994962, 9.270084554045841, 13.080983595827059, 0.4836473408178339],
"eval_len": [21, 22, 15, 28, 26, 17, 12, 18, 29, 16]}

 13%|█▎        | 129997/1000000 [1:46:44<9:33:18, 25.29it/s]global step 130000, trans_decision ep_re 38.06929293807719

{"global_step": 130000, "eval_re": [9.6756451668884, 4.94533592063909, 
6.2457905751530545, 6.676917231889011, 13.807902486842819, 9.676699976101743, 
286.1588969122214, 36.4240744076817, 3.0276669059352206, 4.0539997974193875], 
"eval_len": [31, 15, 23, 20, 24, 22, 207, 58, 26, 15]}

 14%|█▍        | 139997/1000000 [1:55:25<9:33:57, 24.97it/s]global step 140000, trans_decision ep_re 34.49502159501881

{"global_step": 140000, "eval_re": [1.951219307403254, 9.061979270970692, 
3.077404155005385, 11.319797597533565, 1.787483353753627, 1.2107251960953416, 
7.445535874620829, 14.467984449203804, 294.9557427279797, -0.32765598237807453],
"eval_len": [20, 31, 16, 26, 21, 15, 19, 28, 209, 21]}

 15%|█▍        | 149999/1000000 [2:04:20<9:23:21, 25.15it/s]global step 150000, trans_decision ep_re 36.75833733344935

{"global_step": 150000, "eval_re": [293.557475693351, 7.852695128632709, 
9.394967333148285, 7.905466953664084, 8.34398049473926, 11.603950361445033, 
12.420635162310566, 3.6918771863904376, 12.90521624894195, 
-0.09289122812985645], "eval_len": [177, 24, 22, 27, 20, 28, 27, 22, 27, 22]}

 16%|█▌        | 159999/1000000 [2:13:00<9:12:21, 25.35it/s]global step 160000, trans_decision ep_re 15.957673893299477

{"global_step": 160000, "eval_re": [3.5807930238477486, 132.10294084419942, 
5.167362697235674, 4.277681134551803, 6.35968785737571, -5.059405402341557, 
4.634469012233262, 0.5881950788889008, 1.241905312494311, 6.683109374509517], 
"eval_len": [15, 114, 26, 16, 23, 18, 16, 12, 21, 18]}

 17%|█▋        | 169999/1000000 [2:21:31<9:05:53, 25.34it/s]global step 170000, trans_decision ep_re 6.548190323218277

{"global_step": 170000, "eval_re": [-2.8253849538363855, 11.16037929924923, 
6.183377103484931, 3.7584453392086634, 5.269559994114818, 14.755471366188619, 
7.285771514743001, 2.3958605426980375, 19.313660896928646, -1.8152378705967862],
"eval_len": [23, 26, 23, 30, 23, 28, 19, 26, 41, 14]}

 18%|█▊        | 179998/1000000 [2:30:12<8:55:32, 25.52it/s]global step 180000, trans_decision ep_re 58.44690658387992

{"global_step": 180000, "eval_re": [2.6501206287804715, 0.8839278528212382, 
77.80295326162675, 10.206785653045538, 6.434825108239282, 435.87470564245723, 
2.73111807329728, 38.21446823686215, 4.250108191566975, 5.420053190102342], 
"eval_len": [22, 24, 110, 22, 35, 259, 14, 81, 16, 18]}

 19%|█▉        | 189998/1000000 [2:38:52<8:45:55, 25.67it/s]global step 190000, trans_decision ep_re 46.80865574537715

{"global_step": 190000, "eval_re": [8.971780781707665, 7.505501143563765, 
9.378746566416384, 2.3136465577112286, 15.953522779942809, 5.795213226170171, 
207.53470076029575, 16.923917399773767, 191.86744180772374, 1.8420864304662576],
"eval_len": [23, 17, 28, 16, 29, 19, 168, 42, 133, 21]}

 20%|█▉        | 199998/1000000 [2:47:32<8:41:57, 25.55it/s]global step 200000, trans_decision ep_re 132.4077396535108

{"global_step": 200000, "eval_re": [0.09255603070292417, 220.92426986776331, 
440.97444521748116, 10.687169085815658, 7.286451335459981, 621.6868457794835, 
9.751085533893141, 7.425404038335082, 3.609220785254566, 1.6399488609186923], 
"eval_len": [13, 140, 170, 26, 30, 258, 28, 26, 15, 29]}

 21%|██        | 209997/1000000 [2:56:30<8:46:55, 24.99it/s]global step 210000, trans_decision ep_re 79.53501690263153

{"global_step": 210000, "eval_re": [2.7847604008372087, 15.161143726679974, 
331.6198570672988, 208.9992193137292, 3.6453757475847217, -0.842583212931711, 
4.137494591478178, 11.6805678514734, 213.6576228391878, 4.50671070097763], 
"eval_len": [18, 31, 184, 165, 15, 19, 27, 22, 153, 24]}

 22%|██▏       | 219999/1000000 [3:05:01<8:38:00, 25.10it/s]global step 220000, trans_decision ep_re 9.01685304588053

{"global_step": 220000, "eval_re": [1.2133419915050276, 3.074208891163942, 
36.75123403522152, 9.756734239042068, 5.65050067653922, 2.4415573870753193, 
11.372007358874187, 9.135791800866293, 5.131192699307621, 5.641961379210119], 
"eval_len": [27, 17, 57, 23, 29, 16, 26, 23, 17, 18]}

 23%|██▎       | 229999/1000000 [3:13:41<8:24:28, 25.44it/s]global step 230000, trans_decision ep_re 83.53112431603618

{"global_step": 230000, "eval_re": [39.92331885719505, -0.03243245336736178, 
8.842540635098555, 3.0199239038060472, 6.121816299933959, 6.488194222818571, 
634.4888196677708, 121.115084663741, 3.944626105733285, 11.399351257632073], 
"eval_len": [50, 26, 19, 14, 25, 28, 347, 95, 19, 39]}

 24%|██▍       | 239999/1000000 [3:22:20<8:12:21, 25.73it/s]global step 240000, trans_decision ep_re 125.04618356759909

{"global_step": 240000, "eval_re": [2.825891183554682, 4.649674823507022, 
920.622792666848, 8.490435403236193, 280.2507892691877, 16.073060660605368, 
6.49691505361267, 0.4655112450487626, 5.783550014653483, 4.803215355736905], 
"eval_len": [18, 34, 558, 21, 148, 25, 26, 22, 35, 23]}

 25%|██▍       | 249999/1000000 [3:31:01<8:17:54, 25.11it/s]global step 250000, trans_decision ep_re 4.265954552907546

{"global_step": 250000, "eval_re": [-2.111566087192578, 5.583729378474615, 
1.7486955751206468, 9.927022282022996, 2.1262968773505992, 4.740216659845697, 
2.430351447188768, 5.1784888721201545, 7.766333686130879, 5.26997683801369], 
"eval_len": [14, 32, 13, 26, 15, 27, 15, 15, 36, 22]}

 26%|██▌       | 259998/1000000 [3:39:45<8:01:49, 25.60it/s]global step 260000, trans_decision ep_re 21.48091582568378

{"global_step": 260000, "eval_re": [16.729580381455683, 3.960561838804963, 
92.88750446372245, 9.074561629743211, 59.541697397346276, 10.99112883421363, 
1.3654093383549188, 7.168669082743433, 12.110498548804905, 0.9795467416483252], 
"eval_len": [31, 22, 151, 27, 61, 36, 25, 20, 24, 16]}

 27%|██▋       | 269999/1000000 [3:48:40<7:57:42, 25.47it/s]global step 270000, trans_decision ep_re 15.946525856498102

{"global_step": 270000, "eval_re": [11.475672401221477, 18.97404952420258, 
14.3566793401464, 74.4932839871051, -1.2798901890322472, 3.2543224783099243, 
1.7591529191326376, 11.388385034646728, 13.037884742250831, 12.005718326997577],
"eval_len": [25, 45, 33, 70, 27, 18, 18, 23, 32, 24]}

 28%|██▊       | 279999/1000000 [3:57:20<7:49:07, 25.58it/s]global step 280000, trans_decision ep_re 8.588047848392934

{"global_step": 280000, "eval_re": [13.707499773268776, 3.1648889485561664, 
11.355637789895734, 0.7457377198783609, 19.131754059646283, 6.899586070298345, 
4.445977840785092, 17.74984832755562, 6.080574435702868, 2.5989735183420892], 
"eval_len": [28, 17, 38, 17, 43, 29, 22, 28, 27, 15]}

 29%|██▉       | 289997/1000000 [4:05:45<7:44:49, 25.46it/s]global step 290000, trans_decision ep_re 5.613243843130409

{"global_step": 290000, "eval_re": [14.89091797598938, 2.5466693142867753, 
15.724760576728556, 0.693463119747029, 2.0788187331031325, 0.9617361165091469, 
14.36319554686045, 5.517150084931472, 0.3053455369839392, -0.9496185738357994], 
"eval_len": [36, 19, 39, 18, 20, 16, 23, 30, 19, 28]}

 30%|██▉       | 299997/1000000 [4:14:40<7:38:19, 25.45it/s]global step 300000, trans_decision ep_re 86.21175052956126

{"global_step": 300000, "eval_re": [5.935372511255923, 59.482904283203894, 
12.495615615060021, 761.7743224782433, 5.396652291438388, 5.164643499711207, 
11.753696924074761, 6.064236496466206, 0.27347344915096883, -6.223412252992284],
"eval_len": [18, 49, 24, 467, 18, 34, 27, 17, 23, 21]}

 31%|███       | 309997/1000000 [4:23:20<7:33:30, 25.36it/s]global step 310000, trans_decision ep_re 56.09322951765372

{"global_step": 310000, "eval_re": [9.711177564113344, 112.73856902876459, 
0.11470448297481055, 1.3725630842650973, 405.7766190524141, 27.184203698164787, 
4.034937351672697, -0.6287909255674798, -0.4832399355680549, 
1.1115517753032416], "eval_len": [22, 118, 19, 25, 211, 44, 23, 22, 12, 28]}

 32%|███▏      | 319999/1000000 [4:32:00<7:29:00, 25.24it/s]global step 320000, trans_decision ep_re 44.932501657025945

{"global_step": 320000, "eval_re": [140.62341826823865, 1.4394187962073006, 
144.97549942145966, 2.207070562660778, 14.588768073895176, 5.922883088727437, 
4.68807165385145, 8.741242415624784, 12.042330025742388, 114.09631426385181], 
"eval_len": [99, 12, 89, 14, 24, 23, 27, 24, 42, 85]}

 33%|███▎      | 329998/1000000 [4:40:40<7:13:50, 25.74it/s]global step 330000, trans_decision ep_re 33.5844898282789

{"global_step": 330000, "eval_re": [37.094565628335054, 9.422588267507933, 
3.2184474790745, 10.29349544964269, 6.454251249123331, 248.60179282784944, 
3.707429771972302, 7.948575710285402, 2.5420636188963006, 6.561688280102015], 
"eval_len": [97, 35, 18, 24, 22, 145, 21, 29, 14, 17]}

 34%|███▍      | 339998/1000000 [4:49:20<7:07:46, 25.71it/s]global step 340000, trans_decision ep_re 54.124720547645495

{"global_step": 340000, "eval_re": [2.9349661854991482, 0.20748669144588122, 
2.218024015292201, 244.5008217110447, 1.582006810758231, 10.965903158946738, 
8.286113375018658, 9.04692395931332, 252.0761631341738, 9.42879643496219], 
"eval_len": [20, 15, 17, 171, 12, 25, 22, 19, 204, 25]}

 35%|███▍      | 349999/1000000 [4:57:50<7:12:38, 25.04it/s]global step 350000, trans_decision ep_re 19.812867082133483

{"global_step": 350000, "eval_re": [27.774648882077386, -0.2923725210163753, 
8.689080666825554, 5.061128900662689, -1.7877077892299718, 127.22145509370867, 
1.0732442225126508, 8.476752488971657, 20.24470709114679, 1.6677337856757934], 
"eval_len": [107, 13, 39, 20, 12, 108, 19, 26, 42, 16]}

 36%|███▌      | 359999/1000000 [5:06:30<6:59:20, 25.44it/s]global step 360000, trans_decision ep_re 76.89667422983786

{"global_step": 360000, "eval_re": [8.535987219666115, 2.322594689288688, 
2.9298252412452865, 1.2160111897997634, 15.221709811366907, 8.370217008459866, 
6.617969818963102, 500.9285270890933, 20.085196999520655, 202.73870323097495], 
"eval_len": [23, 17, 18, 13, 23, 24, 17, 181, 37, 125]}

 37%|███▋      | 369998/1000000 [5:15:12<6:44:52, 25.93it/s]global step 370000, trans_decision ep_re 5.946178897795057

{"global_step": 370000, "eval_re": [5.6324076875684845, 4.831529015390789, 
-0.12105583299306685, 1.0302227548088165, 4.423385229108311, 10.380833178629373,
5.41423648890017, 27.651357908852958, -1.9312243051856206, 2.150096852870357], 
"eval_len": [21, 26, 12, 18, 41, 23, 20, 47, 20, 22]}

 38%|███▊      | 379998/1000000 [5:23:52<6:40:21, 25.81it/s]global step 380000, trans_decision ep_re 38.55543484011528

{"global_step": 380000, "eval_re": [2.988684630808968, 14.197135672284691, 
13.333060720972158, 345.2058250873008, -0.023920950470064394, 
-1.1441529459380748, -1.2289667771135238, 0.933125359561855, 10.105183297830333,
1.188374305915733], "eval_len": [19, 26, 21, 184, 9, 24, 26, 16, 29, 19]}

 39%|███▉      | 389999/1000000 [5:32:40<6:36:15, 25.66it/s]global step 390000, trans_decision ep_re 31.294535721966135

{"global_step": 390000, "eval_re": [45.651148455358886, 133.3147399872655, 
7.390004190277393, 4.917395214408357, 8.14452844340554, 91.16707390596888, 
9.487497839553003, 3.730328443906637, 6.7541594601252175, 2.3884812793918813], 
"eval_len": [45, 204, 24, 21, 26, 148, 19, 18, 21, 12]}

 40%|███▉      | 399998/1000000 [5:41:20<6:30:38, 25.60it/s]global step 400000, trans_decision ep_re 68.11630903188176

{"global_step": 400000, "eval_re": [1.3133170276908832, 2.7513494509779384, 
50.50455031937648, 5.5695882355806035, 243.99311703805327, -0.3447170737959532, 
9.887655802193436, 308.9380197139396, 4.538506476012723, 54.011703328788755], 
"eval_len": [17, 26, 68, 16, 152, 28, 25, 164, 16, 54]}

 41%|████      | 409998/1000000 [5:50:00<6:18:43, 25.96it/s]global step 410000, trans_decision ep_re 12.85291284956713

{"global_step": 410000, "eval_re": [2.903900929352866, 5.609349068960012, 
2.3277026318241525, 7.521209347615384, 5.548867315239204, 5.384653752652238, 
87.64003947189303, 2.0951084899562753, 1.2619472622294619, 8.236350225948694], 
"eval_len": [29, 21, 14, 16, 16, 20, 105, 16, 18, 22]}

 42%|████▏     | 419999/1000000 [5:58:25<6:18:11, 25.56it/s]global step 420000, trans_decision ep_re 54.43818062321642

{"global_step": 420000, "eval_re": [-1.2501176220528327, 8.787778956784946, 
13.025341884411947, 1.5512730062911197, -2.4307980997391057, 6.955733080317032, 
6.698021324989498, 3.8710425997045452, 508.8118114175489, -1.6382803160918593], 
"eval_len": [11, 29, 25, 14, 18, 20, 21, 17, 221, 12]}

 43%|████▎     | 429998/1000000 [6:07:03<6:06:20, 25.93it/s]global step 430000, trans_decision ep_re 40.28391825611348

{"global_step": 430000, "eval_re": [-3.014082607033513, 2.7779162693478554, 
-0.9090509314578368, 0.16157675880347522, 11.662051901137767, 
180.04018759213477, -1.515767112932698, 9.892528605334789, 0.7729701853666633, 
202.97085190043353], "eval_len": [13, 18, 22, 13, 26, 128, 20, 22, 13, 207]}

 44%|████▍     | 439999/1000000 [6:15:41<6:03:41, 25.66it/s]global step 440000, trans_decision ep_re 114.27154915631908

{"global_step": 440000, "eval_re": [9.760430053899793, 929.8152973826705, 
1.0464155172585734, 7.646212472764217, 7.066150852341391, 170.15279631966743, 
2.9579998583932507, 4.071160277309598, 8.783140303309377, 1.4158885255767584], 
"eval_len": [25, 467, 12, 18, 20, 157, 21, 18, 26, 12]}

 45%|████▍     | 449999/1000000 [6:24:20<5:55:00, 25.82it/s]global step 450000, trans_decision ep_re 76.12780918700973

{"global_step": 450000, "eval_re": [7.429484394952527, 3.322314053754785, 
618.8566402761054, 10.525345926195708, 2.827733238021708, 93.40311057186264, 
4.019612738258323, 6.740545046486769, 13.159146126126839, 0.9941594983326502], 
"eval_len": [25, 24, 222, 30, 15, 114, 17, 25, 28, 12]}

 46%|████▌     | 459998/1000000 [6:33:10<5:46:29, 25.97it/s]global step 460000, trans_decision ep_re 40.15501579642468

{"global_step": 460000, "eval_re": [0.35401648810921266, 131.1096060494606, 
82.62069850560837, -1.7720168841457749, 7.336665249050408, 1.456849457326959, 
169.75507980761796, 3.081703231039825, 3.430870748157224, 4.176685312021951], 
"eval_len": [12, 121, 86, 12, 22, 25, 111, 14, 13, 17]}

 47%|████▋     | 469997/1000000 [6:41:33<5:41:31, 25.86it/s]global step 470000, trans_decision ep_re 55.60100596236104

{"global_step": 470000, "eval_re": [189.55926871704392, 325.6578741235729, 
-0.21997415192802489, 12.572855733581102, 8.194283890323108, 5.319642581387612, 
10.587952624059136, 1.481789492807751, 2.802942124806066, 0.053424487956828914],
"eval_len": [110, 296, 25, 26, 18, 17, 26, 29, 29, 15]}

 48%|████▊     | 479999/1000000 [6:50:20<5:36:18, 25.77it/s]global step 480000, trans_decision ep_re 21.856532254697193

{"global_step": 480000, "eval_re": [188.05467634200335, 6.239851639132633, 
2.7351562196919934, 1.3013736625980343, 1.521991429632226, 7.387661875811565, 
4.005421246306923, 3.5677281126424916, 2.1532708004453354, 1.5981912187073832], 
"eval_len": [96, 28, 19, 12, 27, 21, 19, 19, 15, 22]}

 49%|████▉     | 489998/1000000 [6:59:00<5:29:47, 25.77it/s]global step 490000, trans_decision ep_re 175.62139414374136

{"global_step": 490000, "eval_re": [11.797801178850927, 3.9595865556456236, 
17.087063168592923, 8.870361665132716, 767.0568716196999, 10.02161476239055, 
17.070481696216174, 103.96111159654053, 811.4360538304708, 4.952995363873131], 
"eval_len": [64, 19, 26, 24, 324, 19, 43, 100, 343, 19]}

 50%|████▉     | 499998/1000000 [7:07:40<5:19:03, 26.12it/s]global step 500000, trans_decision ep_re 61.05736437410193

{"global_step": 500000, "eval_re": [6.068067640320022, 5.011399158273813, 
7.834602831416215, 8.8514117520406, 4.255523681289793, 6.779977540006232, 
141.2221718844157, 313.2397639248086, -3.1927336534374313, 120.5034589818857], 
"eval_len": [23, 15, 30, 26, 22, 22, 81, 176, 9, 97]}

 51%|█████     | 509998/1000000 [7:16:02<5:16:31, 25.80it/s]global step 510000, trans_decision ep_re 10.775181850754219

{"global_step": 510000, "eval_re": [-1.2695518698482433, 10.036302108155386, 
8.539482044881545, 0.9763875036490103, 1.9033770283249498, 18.01415799232482, 
3.5721953981527013, 6.014830617067017, 26.970190558610675, 32.994447126224344], 
"eval_len": [16, 22, 25, 21, 26, 26, 17, 28, 40, 48]}

 52%|█████▏    | 519997/1000000 [7:24:50<5:08:56, 25.90it/s]global step 520000, trans_decision ep_re 25.683921821691648

{"global_step": 520000, "eval_re": [147.5064583658878, 2.3661265790581245, 
4.565237282204039, 12.544804254378377, 4.940238668986811, 1.474478026495502, 
3.0202068394898034, 5.887933351089102, 0.8885557018825753, 73.6451791474444], 
"eval_len": [149, 23, 17, 26, 22, 18, 26, 18, 19, 108]}

 53%|█████▎    | 529997/1000000 [7:33:14<5:08:18, 25.41it/s]global step 530000, trans_decision ep_re 44.66969438576755

{"global_step": 530000, "eval_re": [8.852416039673072, 0.40105887987255784, 
4.808665344036936, 283.1468790159677, 25.541787557599584, 6.063657779056826, 
5.202234983278474, 9.182638535103424, 4.190035568424575, 99.30757015466236], 
"eval_len": [24, 11, 16, 224, 44, 32, 17, 22, 14, 78]}

 54%|█████▍    | 539999/1000000 [7:42:00<4:58:58, 25.64it/s]global step 540000, trans_decision ep_re 26.440749782434835

{"global_step": 540000, "eval_re": [34.100288196836104, 1.9761373918643053, 
6.739171904731933, 15.810102103717247, 17.60673041279897, 15.409389518382625, 
3.7482839466387885, 147.06107604483552, 8.878022665464318, 13.078295639078487], 
"eval_len": [64, 26, 25, 27, 29, 45, 18, 180, 22, 28]}

 55%|█████▍    | 549997/1000000 [7:50:23<4:52:01, 25.68it/s]global step 550000, trans_decision ep_re 87.93033328961347

{"global_step": 550000, "eval_re": [183.3116270796249, -2.006654591259633, 
328.700169538466, 4.08860638490241, 346.68735584254256, 9.054938339012857, 
3.879288520135687, 2.192924421250216, 4.083240451317012, -0.6881630898572169], 
"eval_len": [156, 27, 211, 19, 184, 21, 14, 18, 30, 14]}

 56%|█████▌    | 559999/1000000 [7:59:10<4:45:00, 25.73it/s]global step 560000, trans_decision ep_re 43.8362683621538

{"global_step": 560000, "eval_re": [5.389991760352677, 11.802883691063947, 
52.75718651845255, 11.259832773198946, 336.7999236989088, 3.6769741461958607, 
0.1550467465122733, 3.536794331842551, 2.4492056247119014, 10.534844330298517], 
"eval_len": [16, 24, 120, 20, 150, 21, 13, 26, 13, 21]}

 57%|█████▋    | 569999/1000000 [8:07:31<4:37:31, 25.82it/s]global step 570000, trans_decision ep_re 6.782867516090008

{"global_step": 570000, "eval_re": [2.193679751438302, 4.937452502248604, 
9.895101031692302, 10.658265640093065, 4.847963187002878, 5.279294120602279, 
0.2749919537625665, 1.430550805725292, 14.901168566417894, 13.410207601916891], 
"eval_len": [16, 14, 25, 29, 17, 28, 16, 27, 39, 29]}

 58%|█████▊    | 579999/1000000 [8:16:04<4:33:39, 25.58it/s]global step 580000, trans_decision ep_re 64.14776024930255

{"global_step": 580000, "eval_re": [6.332384482797714, 4.430489769823016, 
153.20398714114162, 177.62739644324165, 5.1107841177500495, 11.564002616901888, 
0.4486502120707474, 3.072610575824223, 273.2118366009919, 6.47546053248276], 
"eval_len": [21, 21, 113, 97, 33, 34, 22, 13, 128, 15]}

 59%|█████▉    | 589999/1000000 [8:24:50<4:25:26, 25.74it/s]global step 590000, trans_decision ep_re 186.01563369557616

{"global_step": 590000, "eval_re": [150.1729896819195, 7.21212904776403, 
7.7093710180572295, 3.828372189263161, 2.064587754640495, 7.965078237540807, 
92.98406141995488, 4.644492173307501, 837.7045957330283, 745.8706597002856], 
"eval_len": [231, 20, 24, 18, 17, 19, 78, 27, 316, 386]}

 60%|█████▉    | 599997/1000000 [8:33:13<4:19:39, 25.67it/s]global step 600000, trans_decision ep_re 46.76274035353605

{"global_step": 600000, "eval_re": [101.27542351101211, 3.903577988334151, 
125.7878848564066, 192.37278466385553, 1.0266168427193363, 11.12611478469544, 
16.15339783551926, 5.990659710720566, 8.974554319415157, 1.0163890226823904], 
"eval_len": [91, 13, 107, 153, 16, 23, 41, 18, 24, 17]}

 61%|██████    | 609997/1000000 [8:42:00<4:11:31, 25.84it/s]global step 610000, trans_decision ep_re 4.186927304071633

{"global_step": 610000, "eval_re": [3.5218635349705236, 3.4598641742086094, 
5.8232690400265055, 5.4984665170374, -1.2883441918964433, 2.9146501950920234, 
0.9838658159863587, 10.616680437247977, 1.7667676577311278, 8.572189860312244], 
"eval_len": [28, 15, 28, 28, 13, 21, 13, 27, 13, 27]}

 62%|██████▏   | 619999/1000000 [8:50:20<4:07:19, 25.61it/s]global step 620000, trans_decision ep_re 49.74426843975265

{"global_step": 620000, "eval_re": [8.946501540232136, 3.461693372314104, 
3.728709107256425, 4.812889873617805, 8.675657502497174, 1.2570721254822348, 
8.864049874766202, 9.57068263045962, 2.7960906070756053, 445.3293377638252], 
"eval_len": [21, 27, 21, 23, 21, 27, 21, 18, 20, 211]}

 63%|██████▎   | 629999/1000000 [8:58:55<3:59:05, 25.79it/s]global step 630000, trans_decision ep_re 65.80634673190188

{"global_step": 630000, "eval_re": [352.4348404598512, 3.4501911011792745, 
5.285678428783802, 236.69435253395724, 13.284730319445277, 14.61969889270597, 
1.4443914666605298, 2.4517363620897057, 20.977970837296134, 7.419876917049842], 
"eval_len": [185, 35, 15, 125, 30, 28, 20, 24, 34, 19]}

 64%|██████▍   | 639999/1000000 [9:07:40<3:53:41, 25.68it/s]global step 640000, trans_decision ep_re 30.151217871883993

{"global_step": 640000, "eval_re": [7.91346739207317, 2.1939919965679238, 
193.67161232942397, 12.553121211994945, 11.334446741176404, 12.673757878263759, 
34.154894755880896, 15.427499383530389, 0.4069789084899385, 11.18240812143852], 
"eval_len": [19, 25, 315, 27, 29, 26, 148, 28, 20, 26]}

 65%|██████▍   | 649998/1000000 [9:16:02<3:42:34, 26.21it/s]global step 650000, trans_decision ep_re 49.67880823414673

{"global_step": 650000, "eval_re": [3.7733449552683265, 4.852810078583656, 
178.86600263446707, 13.72169446011605, 3.53220903491981, 270.65454173462365, 
7.800806176735041, 7.190155487399095, 1.983826822884195, 4.412690956470416], 
"eval_len": [17, 19, 109, 26, 28, 137, 22, 27, 18, 17]}

 66%|██████▌   | 659998/1000000 [9:24:36<3:38:18, 25.96it/s]global step 660000, trans_decision ep_re 28.554464751290038

{"global_step": 660000, "eval_re": [5.842767281160695, 6.421296634569526, 
2.665406181079465, 255.02691001361433, -1.8225256616337897, 1.7628913632321717, 
1.0051119371185144, 3.653320249911175, 7.600785393855751, 3.3886841199925746], 
"eval_len": [19, 24, 15, 176, 25, 15, 21, 16, 23, 14]}

 67%|██████▋   | 669999/1000000 [9:33:11<3:33:02, 25.82it/s]global step 670000, trans_decision ep_re 7.105241145505213

{"global_step": 670000, "eval_re": [2.105859154008735, -0.2752774532588409, 
13.389009420319379, -1.1766330659853197, 4.316417049939769, 8.327935286203058, 
12.30704581589694, 2.307161238491091, 26.065838384573706, 3.6850556248636055], 
"eval_len": [17, 15, 26, 10, 18, 20, 30, 14, 55, 14]}

 68%|██████▊   | 679999/1000000 [9:41:44<3:25:30, 25.95it/s]global step 680000, trans_decision ep_re 43.39489019855743

{"global_step": 680000, "eval_re": [6.464204621856198, 10.18803594308904, 
3.4998393407639945, 185.67200998493058, 3.391442565532067, 5.713287135592079, 
5.119205193967017, 8.885867875348527, 171.248724935023, 33.76628438947172], 
"eval_len": [25, 21, 21, 96, 17, 23, 19, 24, 112, 95]}

 69%|██████▉   | 689999/1000000 [9:50:30<3:21:57, 25.58it/s]global step 690000, trans_decision ep_re 51.91865811470441

{"global_step": 690000, "eval_re": [8.67532961464298, 4.938121166404963, 
368.3124090462144, 5.635886596863702, 12.174801074576626, 94.28332154349361, 
8.320293635230051, 0.19814496899456285, 5.77389312462604, 10.87438037599717], 
"eval_len": [28, 20, 220, 24, 30, 125, 29, 26, 16, 20]}

 70%|██████▉   | 699998/1000000 [9:58:52<3:10:18, 26.27it/s]global step 700000, trans_decision ep_re 108.11118516510206

{"global_step": 700000, "eval_re": [-0.3241734016814387, 1.5630613645793972, 
1.1650707270840157, 331.7801999985529, 13.25195749691804, 350.5786188130265, 
0.40255417468053356, 301.28706214356765, 8.410925499889938, 72.99657483440312], 
"eval_len": [11, 15, 13, 241, 37, 170, 17, 159, 23, 210]}

 71%|███████   | 709997/1000000 [10:07:40<3:07:33, 25.77it/s]global step 710000, trans_decision ep_re 6.18501002858407

{"global_step": 710000, "eval_re": [5.1108488448126215, 6.126957813830729, 
6.5504040819309735, 5.039334915694199, 5.777842344326108, 3.5798040208604074, 
3.1813440979859986, 13.774444055753197, 12.513137312074672, 0.1959827985717949],
"eval_len": [26, 20, 20, 24, 21, 23, 22, 26, 30, 30]}

 72%|███████▏  | 719998/1000000 [10:16:03<3:01:10, 25.76it/s]global step 720000, trans_decision ep_re 6.4441120712649465

{"global_step": 720000, "eval_re": [6.506381359102905, 26.339869108522834, 
5.216329451274492, 1.3790334230067012, 3.3435488685296324, -0.2795114956599595, 
1.4451683049417392, -3.9278814075782043, 19.607298787750274, 4.810884312759053],
"eval_len": [19, 42, 15, 18, 14, 20, 15, 24, 40, 31]}

 73%|███████▎  | 729998/1000000 [10:24:36<2:53:23, 25.95it/s]global step 730000, trans_decision ep_re 56.04567054175861

{"global_step": 730000, "eval_re": [4.130245856905072, 90.86537085787393, 
-0.7081189761076778, 200.78310965756674, 1.929092001467379, 3.9030717739841894, 
235.75336409132223, 7.012397813818357, 11.30912935302773, 5.4790429877281435], 
"eval_len": [16, 98, 21, 134, 19, 14, 134, 16, 31, 20]}

 74%|███████▍  | 739999/1000000 [10:33:20<2:47:25, 25.88it/s]global step 740000, trans_decision ep_re 12.148522549646229

{"global_step": 740000, "eval_re": [6.408209146875653, 1.1380269783846637, 
10.476619476912292, 5.742964502233103, 6.507403294044825, 1.5504699382527065, 
3.7740825540527814, 75.60743863803603, 7.666280007381463, 2.613730960288777], 
"eval_len": [23, 17, 34, 24, 23, 21, 27, 170, 27, 13]}

 75%|███████▍  | 749997/1000000 [10:41:43<2:43:22, 25.50it/s]global step 750000, trans_decision ep_re 7.157020697462764

{"global_step": 750000, "eval_re": [3.6056735314010466, 1.2298489429860355, 
4.0908817635600965, 6.125304128920831, 8.610015067624802, 3.2422185510844588, 
17.595070506670453, 14.531837131796962, 8.550869929942856, 3.988487420640105], 
"eval_len": [18, 21, 15, 15, 26, 16, 28, 25, 19, 20]}

 76%|███████▌  | 759997/1000000 [10:50:16<2:34:13, 25.94it/s]global step 760000, trans_decision ep_re 10.936013430845403

{"global_step": 760000, "eval_re": [5.893349118635556, 4.927392167620143, 
52.14596843778288, 3.062605668654451, 13.058471600510575, 10.005635180502496, 
-2.7803792987957046, 11.882142848903555, 8.198585597242555, 2.9663629873975235],
"eval_len": [17, 23, 65, 14, 28, 35, 24, 24, 21, 17]}

 77%|███████▋  | 769999/1000000 [10:58:51<2:29:55, 25.57it/s]global step 770000, trans_decision ep_re 99.90343541072616

{"global_step": 770000, "eval_re": [0.5621374521544154, 544.168005641631, 
0.5239768676908789, 7.642133219900377, 14.409761251277798, 6.667000400566438, 
2.584272386755042, 219.77015407916534, 2.806344132272283, 199.900568675848], 
"eval_len": [12, 236, 15, 28, 28, 21, 30, 134, 15, 116]}

 78%|███████▊  | 779997/1000000 [11:07:40<2:22:16, 25.77it/s]global step 780000, trans_decision ep_re 72.96373740254622

{"global_step": 780000, "eval_re": [1.0146544296457964, 14.385979912537831, 
3.5611148644113455, 2.118476384732181, 8.150417899374375, 22.275214737059347, 
293.0696943671546, 17.254032527450917, 54.158978732074694, 313.64881017102095], 
"eval_len": [14, 29, 14, 14, 18, 43, 160, 28, 93, 188]}

 79%|███████▉  | 789997/1000000 [11:16:03<2:16:49, 25.58it/s]global step 790000, trans_decision ep_re 7.538920343692029

{"global_step": 790000, "eval_re": [20.068198758127075, 4.366151884910526, 
14.427655557919062, 5.715680626274896, 3.1547246496338457, 9.61179062794788, 
6.2989994894039745, 0.5812089611005871, 8.767505390166848, 2.3972874914355966], 
"eval_len": [43, 23, 26, 34, 24, 36, 18, 15, 19, 17]}

 80%|███████▉  | 799998/1000000 [11:24:35<2:07:48, 26.08it/s]global step 800000, trans_decision ep_re 46.098091731890925

{"global_step": 800000, "eval_re": [2.6282986178594174, 236.0268795031144, 
11.940080083800378, 1.2166371343261366, -0.6840381966563467, 195.33193623159002,
-0.09066124577773288, 4.616559440044045, 4.443122969397255, 5.5521027812116195],
"eval_len": [24, 139, 24, 19, 14, 130, 25, 27, 20, 21]}

 81%|████████  | 809999/1000000 [11:33:20<2:04:05, 25.52it/s]global step 810000, trans_decision ep_re 40.50536693956001

{"global_step": 810000, "eval_re": [0.8212046383726428, 46.47965459775524, 
11.315865171908461, 180.8191242979986, 0.22395552250499962, 10.949973663488402, 
5.745902731567601, 131.0237757250601, 7.983739086020597, 9.690473960923509], 
"eval_len": [14, 56, 19, 84, 24, 26, 14, 112, 20, 25]}

 82%|████████▏ | 819999/1000000 [11:41:41<1:55:08, 26.06it/s]global step 820000, trans_decision ep_re 81.88043258691559

{"global_step": 820000, "eval_re": [3.9475821582467208, 8.42675250521413, 
0.3692957724436982, 4.135067903604043, 8.375911286362072, 769.7120522181274, 
6.899847557226526, 10.622893775337355, 2.301936226347238, 4.012986466246566], 
"eval_len": [14, 39, 18, 17, 27, 272, 17, 26, 13, 19]}

 83%|████████▎ | 829999/1000000 [11:50:20<1:48:58, 26.00it/s]global step 830000, trans_decision ep_re 10.323644459950048

{"global_step": 830000, "eval_re": [7.269557008791786, 1.1433410671872104, 
4.341113581334254, 60.544978894137145, 5.550630886984159, 4.656852491937242, 
3.4866603945324846, 5.836195557610527, -0.09779491952306281, 
10.504909636508724], "eval_len": [21, 21, 25, 93, 21, 19, 15, 17, 16, 23]}

 84%|████████▍ | 839999/1000000 [11:58:50<1:42:03, 26.13it/s]global step 840000, trans_decision ep_re 81.28450262326089

{"global_step": 840000, "eval_re": [8.032929599808314, 1.8859351344313955, 
2.5345040717226106, 234.2106286733367, 542.1402627768122, 2.940776131531469, 
8.266626944731305, 0.32904350513233094, 6.019140297376772, 6.485179097725769], 
"eval_len": [29, 15, 29, 165, 246, 18, 22, 14, 28, 22]}

 85%|████████▍ | 849999/1000000 [12:07:06<1:36:27, 25.92it/s]global step 850000, trans_decision ep_re 123.77885736639442

{"global_step": 850000, "eval_re": [3.518211608901478, 1.608472746562856, 
589.3996864562876, 1.6565547297417966, 11.575157264565316, 4.655511447015437, 
2.8981245963915097, 8.937544480474095, -0.29896475379888204, 613.8382750878029],
"eval_len": [25, 13, 222, 25, 36, 22, 15, 28, 17, 249]}

 86%|████████▌ | 859997/1000000 [12:15:33<1:29:15, 26.14it/s]global step 860000, trans_decision ep_re 38.54392162744365

{"global_step": 860000, "eval_re": [4.510480818284437, 6.181555140931241, 
5.405276564625866, 9.584693582164453, 11.32242889863158, -0.8548232304395837, 
344.3930959660779, 0.3110100895391851, 2.367438557844726, 2.21805988677668], 
"eval_len": [23, 18, 16, 22, 22, 22, 201, 15, 13, 14]}

 87%|████████▋ | 869999/1000000 [12:24:10<1:23:02, 26.09it/s]global step 870000, trans_decision ep_re 59.765663898068865

{"global_step": 870000, "eval_re": [559.0817395965797, 0.9802825776440423, 
4.412942876599342, 9.28934341182634, 5.872159179085192, 5.983099893424886, 
5.770074296464338, 4.181240569803791, 1.090394181597207, 0.9953623976637135], 
"eval_len": [225, 15, 15, 21, 25, 19, 26, 37, 13, 15]}

 88%|████████▊ | 879999/1000000 [12:32:40<1:17:36, 25.77it/s]global step 880000, trans_decision ep_re 7.4933832191458665

{"global_step": 880000, "eval_re": [3.4219237602266843, 4.518739751788096, 
31.280776514078887, 9.162463801571214, 4.0158763130669595, 3.3021879685654145, 
2.7062933530255124, 9.820772259215424, 1.5524615573071434, 5.152336912613342], 
"eval_len": [17, 15, 45, 26, 17, 16, 28, 21, 28, 18]}

 89%|████████▉ | 889999/1000000 [12:41:10<1:10:45, 25.91it/s]global step 890000, trans_decision ep_re 47.32938737736228

{"global_step": 890000, "eval_re": [117.21270093451992, -4.405856908460798, 
2.829301890697477, 5.950306486313393, 5.347061715099568, 11.552680760538728, 
7.761139600848369, 307.10506622465886, 4.105385766181405, 15.836087303225911], 
"eval_len": [86, 21, 18, 26, 29, 29, 23, 163, 14, 27]}

 90%|████████▉ | 899999/1000000 [12:49:31<1:04:10, 25.97it/s]global step 900000, trans_decision ep_re 50.587841485523285

{"global_step": 900000, "eval_re": [-1.4335249435789068, 14.946647610595921, 
7.906672430174661, 3.496945027200125, 417.1046792228804, 7.691024660594172, 
45.89826772725781, 2.8946803957448592, 5.1367336975295945, 2.236289026834172], 
"eval_len": [26, 28, 24, 21, 196, 20, 116, 13, 25, 17]}

 91%|█████████ | 909997/1000000 [12:58:03<58:16, 25.74it/s]global step 910000, trans_decision ep_re 63.3126902470303

{"global_step": 910000, "eval_re": [7.1052131120374264, 5.267747457909202, 
93.31094434190862, 480.12952840791445, 9.956975630781246, 11.27929093583857, 
12.069669069263872, 1.560474949391292, 9.521621718960509, 2.925436846297726], 
"eval_len": [21, 28, 159, 186, 28, 28, 29, 12, 28, 19]}

 92%|█████████▏| 919997/1000000 [13:06:50<52:18, 25.49it/s]global step 920000, trans_decision ep_re 29.91212447003536

{"global_step": 920000, "eval_re": [6.1643906328827125, -7.0011805102896485, 
9.972151882858284, 1.5721414683133401, 252.64980066762115, 5.388525733357408, 
9.943702215030173, 2.1182319293055145, 10.861705562382417, 7.4517751188922405], 
"eval_len": [17, 21, 25, 16, 146, 18, 22, 19, 25, 20]}

 93%|█████████▎| 929998/1000000 [13:15:20<44:06, 26.45it/s]global step 930000, trans_decision ep_re 33.97175776227552

{"global_step": 930000, "eval_re": [8.876655601419856, 2.5863821093127357, 
1.4072088981516508, 5.028053554064149, 288.7894702912074, 4.008251024726225, 
3.1708578799487785, 14.996528446491999, 2.7542235258100187, 8.09994629162249], 
"eval_len": [19, 18, 15, 14, 145, 14, 29, 28, 27, 28]}

 94%|█████████▍| 939997/1000000 [13:23:50<38:40, 25.85it/s]global step 940000, trans_decision ep_re 70.28851593899233

{"global_step": 940000, "eval_re": [12.419620681733456, 2.6668386774580317, 
338.7517141762363, 17.207520999337778, 286.7638599497411, 2.939159167936639, 
1.8654165869520707, 14.66396435802931, 6.890126621789678, 18.716938170708907], 
"eval_len": [31, 17, 157, 31, 182, 18, 16, 26, 17, 40]}

 95%|█████████▍| 949999/1000000 [13:32:20<32:21, 25.75it/s]global step 950000, trans_decision ep_re 31.33218936819714

{"global_step": 950000, "eval_re": [19.104499487861236, 6.504277904093726, 
2.0104026024243487, 3.767537012234436, 2.8943414022490597, 8.805040190310947, 
233.19395686277898, 12.260598489183076, 20.497817589982407, 4.283422140853218], 
"eval_len": [28, 21, 20, 64, 24, 26, 162, 25, 56, 16]}

 96%|█████████▌| 959999/1000000 [13:40:50<25:54, 25.73it/s]global step 960000, trans_decision ep_re 39.4449428418305

{"global_step": 960000, "eval_re": [-2.2232517164152545, 55.97196813294929, 
1.2031198661662312, 4.069816314454095, 8.80088211236792, 8.518998516538247, 
311.51587220999716, 2.386897495907345, 1.7913729177850275, 2.4137525685549717], 
"eval_len": [18, 79, 27, 17, 20, 22, 175, 19, 19, 16]}

 97%|█████████▋| 969997/1000000 [13:49:12<19:10, 26.09it/s]global step 970000, trans_decision ep_re 32.5512043305199

{"global_step": 970000, "eval_re": [-0.18255090848027827, 5.308513186977833, 
1.1452387329804747, 6.374478414822462, 5.081572630526818, 3.978808797079128, 
11.273986941796576, 292.0801215180154, 2.2850115129959567, -1.833137521515293], 
"eval_len": [16, 21, 14, 27, 14, 15, 26, 174, 13, 20]}

 98%|█████████▊| 979998/1000000 [13:57:42<12:39, 26.32it/s]global step 980000, trans_decision ep_re 35.536044156640514

{"global_step": 980000, "eval_re": [146.52121188119844, -1.2690958528350427, 
6.652775683956306, 158.32640974444058, 3.0094147146662165, 3.6210417548277283, 
2.373654168055383, 14.40703334624094, 6.615086698233831, 15.10290942762077], 
"eval_len": [191, 16, 23, 152, 19, 20, 17, 26, 22, 28]}

 99%|█████████▉| 989997/1000000 [14:06:13<06:25, 25.93it/s]global step 990000, trans_decision ep_re 42.701368949378775

{"global_step": 990000, "eval_re": [99.28947351180265, 3.0167941614949743, 
32.9497780334723, 10.858295374105, 6.3015094639097535, 7.025344971418265, 
6.597076453486418, 7.045932112873914, 3.757819784891747, 250.17166562633273], 
"eval_len": [88, 16, 70, 43, 27, 24, 19, 29, 13, 181]}

100%|█████████▉| 999997/1000000 [14:14:43<00:00, 25.90it/s]global step 1000000, trans_decision ep_re 14.97657914574302

{"global_step": 1000000, "eval_re": [4.505269966703061, 13.07080501807472, 
3.478631240556418, 7.289723762114528, 0.6329238907679198, -1.9136456110184534, 
109.81986943519517, 4.8896599644180245, 4.523231799736967, 3.4693219908818245], 
"eval_len": [17, 22, 28, 27, 13, 14, 84, 25, 15, 14]}

100%|██████████| 1000000/1000000 [14:14:56<00:00, 19.49it/s]
