
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9998/1000000 [04:32<20:50:13, 13.20it/s]global step 10000, trans_decision ep_re 745.4761022605504

{"global_step": 10000, "eval_re": [730.1275997796267, 746.9330722790472, 
756.9122058295497, 756.9108800380184, 750.9120824398705, 758.9734791817069, 
748.4928357145329, 719.4532064177483, 745.8822084279627, 740.1634524974407], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [11:25<5:39:27, 48.12it/s]global step 20000, trans_decision ep_re 366.8895859756632

{"global_step": 20000, "eval_re": [709.6529454397496, 91.54283626474319, 
762.5183225083437, 80.02918105131076, 686.0849801391613, 642.1744862788997, 
543.6995705965325, 33.308027397294, 61.01042206633679, 58.875088014259916], 
"eval_len": [1000, 90, 1000, 117, 1000, 1000, 1000, 49, 70, 71]}

  3%|▎         | 29999/1000000 [17:35<28:07:57,  9.58it/s]global step 30000, trans_decision ep_re 387.42555113886476

{"global_step": 30000, "eval_re": [382.55642682263226, 816.1315846703999, 
759.8270244277794, 14.782735739654056, 284.40165163084276, 594.428914376894, 
8.849042380866408, 878.6097521195742, 14.497118594455596, 120.17126062554922], 
"eval_len": [539, 1000, 1000, 16, 405, 670, 13, 1000, 14, 152]}

  4%|▍         | 39998/1000000 [23:15<7:13:38, 36.90it/s]global step 40000, trans_decision ep_re 896.8464711759154

{"global_step": 40000, "eval_re": [889.2257974735641, 914.8238489136156, 
910.1936330969993, 41.824458991765134, 875.0503443928422, 1070.8614526812926, 
950.2186377513824, 1170.431072196703, 1066.6063830760024, 1079.2290831849864], 
"eval_len": [1000, 860, 1000, 30, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49995/1000000 [28:45<5:31:46, 47.72it/s]global step 50000, trans_decision ep_re 795.1276575220343

{"global_step": 50000, "eval_re": [1118.917044964849, 902.5890791437857, 
921.8888253325681, 501.7842819869468, 964.4722355381471, 900.5304208816021, 
91.29839493436103, 866.5973456672743, 778.227828169687, 904.9711186011209], 
"eval_len": [1000, 1000, 681, 345, 716, 1000, 67, 851, 1000, 1000]}

  6%|▌         | 59997/1000000 [33:45<5:25:09, 48.18it/s]global step 60000, trans_decision ep_re 536.0945154811374

{"global_step": 60000, "eval_re": [916.6566420871765, 972.9415090751886, 
275.69668020605826, 832.9388931122658, 120.82546722192154, 920.8314251808349, 
637.3880099032548, 125.06298408331097, 67.40039464655095, 491.203149294812], 
"eval_len": [628, 1000, 206, 1000, 76, 689, 388, 98, 49, 304]}

  7%|▋         | 69997/1000000 [38:25<5:29:55, 46.98it/s]global step 70000, trans_decision ep_re 975.1082704303348

{"global_step": 70000, "eval_re": [910.0192828300444, 871.556843853919, 
383.01251927827474, 843.5340929241582, 1352.2146974679633, 936.419367726258, 
1361.214523330429, 557.4613098274086, 884.1952827579356, 1651.454784306957], 
"eval_len": [1000, 1000, 228, 1000, 951, 1000, 880, 434, 1000, 1000]}

  8%|▊         | 79997/1000000 [43:15<5:20:10, 47.89it/s]global step 80000, trans_decision ep_re 394.70154754568335

{"global_step": 80000, "eval_re": [62.401734962047925, 380.6620642426499, 
61.658576500093794, 678.6439887940184, 72.20120062816574, 91.35562056512839, 
580.2725362520788, 1095.013885798934, 859.2121595924962, 65.59370812122013], 
"eval_len": [46, 326, 40, 484, 46, 42, 288, 1000, 1000, 45]}

  9%|▉         | 89996/1000000 [48:15<5:11:25, 48.70it/s]global step 90000, trans_decision ep_re 1040.7995029994784

{"global_step": 90000, "eval_re": [585.9943358379454, 230.7712989074002, 
1046.0477705292149, 1792.4053175286695, 502.20941737326774, 362.36513235890067, 
1819.6187373445846, 1726.7758353697793, 495.140443940594, 1846.666740804427], 
"eval_len": [341, 120, 603, 1000, 298, 220, 1000, 1000, 261, 1000]}

 10%|▉         | 99999/1000000 [53:15<5:11:03, 48.22it/s]global step 100000, trans_decision ep_re 756.3051311892866

{"global_step": 100000, "eval_re": [424.45867586335294, 442.7293437105727, 
1436.4204018882613, 888.9282275967812, 1234.8316709371802, 57.629605575653834, 
1627.104843802292, 351.07614194663114, 1067.4327679464263, 32.439632625713486], 
"eval_len": [263, 239, 1000, 539, 746, 37, 1000, 219, 687, 38]}

 11%|█         | 109995/1000000 [58:05<5:17:34, 46.71it/s]global step 110000, trans_decision ep_re 1071.233868434122

{"global_step": 110000, "eval_re": [792.7134540238354, 1394.0088525983224, 
767.1955804979939, 364.28856469042364, 447.1307173887281, 1318.2354279115903, 
2014.6567806660257, 493.62510029368246, 1399.4204063403683, 1721.06379993025], 
"eval_len": [418, 751, 430, 189, 239, 614, 1000, 270, 1000, 1000]}

 12%|█▏        | 119995/1000000 [1:02:55<5:06:27, 47.86it/s]global step 120000, trans_decision ep_re 1015.096934072821

{"global_step": 120000, "eval_re": [28.244148162963892, 745.3961023343368, 
1907.8950678606452, 966.968073740646, 117.21680699773806, 1900.8026289185932, 
617.9719169390785, 891.8711919376146, 1187.3152854427437, 1787.2881183938493], 
"eval_len": [16, 404, 898, 414, 59, 1000, 1000, 475, 560, 745]}

 13%|█▎        | 129998/1000000 [1:07:45<5:04:54, 47.56it/s]global step 130000, trans_decision ep_re 815.8366265300045

{"global_step": 130000, "eval_re": [1116.8422137691612, 2208.903690202054, 
282.47196928864656, 1044.27024087284, 23.749333973900946, 785.3328297572762, 
261.87530128435196, 311.1167637485179, 779.0983796021837, 1344.7055428011133], 
"eval_len": [474, 1000, 122, 474, 15, 327, 164, 140, 1000, 576]}

 14%|█▍        | 139995/1000000 [1:12:35<4:55:48, 48.45it/s]global step 140000, trans_decision ep_re 1472.3554350130762

{"global_step": 140000, "eval_re": [1996.9967818961736, 2102.1565384583537, 
1954.9847319510504, 414.8888885192472, 2155.691617090196, 698.7940287248417, 
131.62301351320204, 2104.773155572095, 2105.8495633304633, 1057.7960310751384], 
"eval_len": [948, 963, 1000, 189, 989, 329, 71, 1000, 1000, 571]}

 15%|█▍        | 149998/1000000 [1:17:35<4:51:42, 48.56it/s]global step 150000, trans_decision ep_re 346.8847585085131

{"global_step": 150000, "eval_re": [174.6208265608648, 190.2736461728157, 
40.72150858447708, 57.94764215443321, 191.94546493710806, 795.8424582818558, 
999.5273827943518, 929.2455292596934, 37.176306459330974, 51.5468198801995], 
"eval_len": [69, 1000, 31, 34, 95, 1000, 476, 1000, 25, 33]}

 16%|█▌        | 159999/1000000 [1:22:10<4:51:29, 48.03it/s]global step 160000, trans_decision ep_re 97.32287895425029

{"global_step": 160000, "eval_re": [789.1000518948143, -468.09747867289957, 
112.43265131201542, 197.58465865607772, 59.64480561050825, 60.60767865880407, 
33.22500175277912, 20.367752997438643, 123.6732094163166, 44.69045791664829], 
"eval_len": [318, 1000, 94, 100, 37, 1000, 24, 17, 46, 26]}

 17%|█▋        | 169997/1000000 [1:27:15<4:45:49, 48.40it/s]global step 170000, trans_decision ep_re 508.2111727004191

{"global_step": 170000, "eval_re": [1272.776781163626, 1310.6898922027883, 
97.31820718840447, 156.8142596757474, -106.29963997940949, -108.16017488582703, 
145.9828638086499, 813.9690562142736, 358.71856960033665, 1140.301912015602], 
"eval_len": [616, 1000, 59, 71, 1000, 1000, 78, 1000, 158, 463]}

 18%|█▊        | 179999/1000000 [1:32:15<4:44:37, 48.02it/s]global step 180000, trans_decision ep_re 913.4902836508836

{"global_step": 180000, "eval_re": [773.593171601283, 2842.3053536357693, 
443.0013058450261, -454.16635383889127, 1051.174460788405, 731.5348807689641, 
2645.549455835691, -463.06901561686914, 593.6097256458781, 971.3698518435812], 
"eval_len": [319, 1000, 213, 1000, 398, 291, 1000, 1000, 233, 1000]}

 19%|█▉        | 189995/1000000 [1:37:05<4:39:38, 48.28it/s]global step 190000, trans_decision ep_re 32.89536625896874

{"global_step": 190000, "eval_re": [-39.65473054995001, 42.46644503490066, 
-135.66907469556315, 209.37924541856222, 116.71926004674562, 72.65923909517792, 
35.80217394179583, 126.08207811222582, -75.6366490199256, -23.194324794281904], 
"eval_len": [1000, 52, 1000, 1000, 135, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199997/1000000 [1:42:05<4:53:00, 45.51it/s]global step 200000, trans_decision ep_re 870.3743182529093

{"global_step": 200000, "eval_re": [1145.6389978773764, 299.74504129205803, 
351.6321888394659, 795.0969443506211, 2406.557311148701, 394.88436941986475, 
174.35705929415042, 381.26209382931137, 1031.3946492396976, 1723.1745272378466],
"eval_len": [583, 129, 128, 404, 1000, 166, 92, 151, 472, 1000]}

 21%|██        | 209997/1000000 [1:46:55<4:32:59, 48.23it/s]global step 210000, trans_decision ep_re 2278.077792392365

{"global_step": 210000, "eval_re": [2798.566003532202, 2736.2220870321967, 
2710.7771236463614, 2731.060946446621, 1551.0419835986786, 2728.347908292501, 
816.3345670050139, 2840.77193844598, 1230.8747118993033, 2636.7806540247866], 
"eval_len": [1000, 1000, 1000, 1000, 549, 1000, 320, 1000, 457, 1000]}

 22%|██▏       | 219997/1000000 [1:51:55<4:33:05, 47.60it/s]global step 220000, trans_decision ep_re 1151.937488520083

{"global_step": 220000, "eval_re": [242.02969495703195, 1738.5223812049344, 
1278.391501535051, 63.81422286481141, 563.4189040199951, 1230.0805871787963, 
652.4148859660136, 709.0340056960218, 3075.037630966621, 1966.6310708115536], 
"eval_len": [95, 577, 379, 48, 234, 426, 289, 240, 1000, 692]}

 23%|██▎       | 229999/1000000 [1:56:46<4:26:53, 48.08it/s]global step 230000, trans_decision ep_re 1868.0428796083197

{"global_step": 230000, "eval_re": [2184.0713077044543, 1053.956083876054, 
3045.5420274428625, 798.5018227357241, 1048.2534723870388, 2957.4846195942264, 
541.2573628763727, 3108.037401070635, 1537.056738057374, 2406.267960338457], 
"eval_len": [816, 390, 1000, 253, 411, 1000, 230, 1000, 546, 793]}

 24%|██▍       | 239999/1000000 [2:01:36<4:22:16, 48.30it/s]global step 240000, trans_decision ep_re 2178.5461885118034

{"global_step": 240000, "eval_re": [2572.8959576395655, 1439.5883051948845, 
2596.091917764341, 2629.405207093272, 1376.8967519797086, 2503.668093545013, 
1133.2175805383936, 2567.211321808369, 2554.080987482147, 2412.4057620723406], 
"eval_len": [1000, 655, 1000, 995, 579, 1000, 389, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [2:06:26<4:18:24, 48.37it/s]global step 250000, trans_decision ep_re 1051.4442402134346

{"global_step": 250000, "eval_re": [951.9890383986873, 594.3874677930492, 
1499.2497702249066, 444.2876379145587, 2012.8123822684877, 2069.449621330736, 
186.4326025436831, 612.4781984203378, 337.33406918822874, 1806.0216140516693], 
"eval_len": [316, 199, 1000, 199, 629, 652, 79, 198, 121, 524]}

 26%|██▌       | 259995/1000000 [2:11:26<4:16:56, 48.00it/s]global step 260000, trans_decision ep_re 1276.34027613252

{"global_step": 260000, "eval_re": [1331.2727427678851, 301.6268198866184, 
1350.318472181072, 3148.3352166919253, 1350.136469332734, 546.4115394188199, 
185.28585234494693, 860.6505809700159, 811.0671493381938, 2878.2979183929897], 
"eval_len": [366, 145, 403, 963, 400, 191, 103, 254, 254, 890]}

 27%|██▋       | 269997/1000000 [2:16:16<4:20:45, 46.66it/s]global step 270000, trans_decision ep_re 442.6029349041937

{"global_step": 270000, "eval_re": [994.7067493877547, 81.15760677361885, 
37.205258584698896, 882.5469881927584, 583.2179687580593, 501.81080488890376, 
140.9518901767473, 622.745504612842, 46.82382276939332, 534.862754897161], 
"eval_len": [1000, 49, 27, 271, 185, 261, 61, 206, 31, 241]}

 28%|██▊       | 279999/1000000 [2:20:46<4:08:08, 48.36it/s]global step 280000, trans_decision ep_re 1324.8642904117003

{"global_step": 280000, "eval_re": [21.784859558698248, 406.31052701519513, 
3511.810717167646, 913.0373680595202, 3273.1758574837586, 608.1743585567893, 
2023.9530766902503, 1395.8710516198475, 963.340864310133, 131.18422365516452], 
"eval_len": [19, 113, 1000, 303, 1000, 175, 627, 433, 292, 46]}

 29%|██▉       | 289995/1000000 [2:25:56<4:05:19, 48.24it/s]global step 290000, trans_decision ep_re 2091.946278873156

{"global_step": 290000, "eval_re": [3221.319583508618, 2337.2254273700973, 
3067.432234558339, 196.765031929372, 312.19326991138894, 2866.2049366480956, 
1129.3698688534528, 2280.152299139772, 2463.3760142405254, 3045.4241225719], 
"eval_len": [1000, 700, 948, 72, 112, 1000, 383, 1000, 1000, 969]}

 30%|██▉       | 299998/1000000 [2:30:56<4:04:47, 47.66it/s]global step 300000, trans_decision ep_re 1371.8654807946164

{"global_step": 300000, "eval_re": [1232.9475463758438, 1002.5880300866481, 
805.7134670189196, 56.09190330106956, 2916.1499426724863, 2762.7886645914464, 
200.34870905385407, 463.4039988174618, 3307.0394783550705, 971.5830676733647], 
"eval_len": [447, 332, 259, 34, 1000, 832, 59, 164, 1000, 300]}

 31%|███       | 309998/1000000 [2:35:46<6:01:44, 31.79it/s]global step 310000, trans_decision ep_re 1832.4749483515589

{"global_step": 310000, "eval_re": [1018.9653422784934, 578.6784877931117, 
23.144794140821606, 2737.2016495759567, 3128.623556763661, 3187.428711312667, 
2319.770802896312, 900.0061324209026, 3127.2648317869102, 1303.6651745467523], 
"eval_len": [318, 237, 23, 1000, 1000, 1000, 788, 294, 1000, 475]}

 32%|███▏      | 319997/1000000 [2:40:46<3:54:37, 48.31it/s]global step 320000, trans_decision ep_re 1262.1553031891779

{"global_step": 320000, "eval_re": [515.9537262500897, 1701.8048924309487, 
3014.334577587045, 3570.6952453644726, 680.9790925794844, 258.6564778588091, 
36.304427304792, 472.51904503468757, 448.8385267186771, 1921.4670207627726], 
"eval_len": [136, 546, 905, 1000, 253, 75, 28, 165, 129, 514]}

 33%|███▎      | 329998/1000000 [2:45:36<4:01:59, 46.15it/s]global step 330000, trans_decision ep_re 2053.1434616240313

{"global_step": 330000, "eval_re": [2715.4986777748522, 3552.3226601396177, 
2599.5623411860365, 2556.841469988962, 304.27656754330536, 450.05382456053724, 
277.5567990532262, 3097.4041676176607, 3728.627535442041, 1249.290572934075], 
"eval_len": [765, 1000, 800, 684, 107, 123, 134, 875, 1000, 347]}

 34%|███▍      | 339997/1000000 [2:50:26<3:47:17, 48.40it/s]global step 340000, trans_decision ep_re 1629.5735536605223

{"global_step": 340000, "eval_re": [1708.8687781049819, 1015.2284943289153, 
111.41307134560479, 190.76251677637816, 3074.544183030597, 2951.8398687012723, 
3003.1171698080593, 2316.4441923224567, 1704.0387139483796, 219.47854823857836],
"eval_len": [506, 354, 58, 78, 1000, 1000, 1000, 700, 548, 114]}

 35%|███▍      | 349997/1000000 [2:55:03<3:44:50, 48.18it/s]global step 350000, trans_decision ep_re 526.6886315883346

{"global_step": 350000, "eval_re": [229.294402538759, 1074.381899982201, 
25.477215382562363, 434.3652588377832, 186.52751365400738, 1904.1919886440076, 
26.831776250910572, 1162.7048890537126, 142.74819626971518, 80.36317526968757], 
"eval_len": [93, 324, 17, 135, 97, 567, 19, 387, 58, 38]}

 36%|███▌      | 359996/1000000 [3:08:21<3:40:41, 48.33it/s]global step 360000, trans_decision ep_re 1329.8468746551966

{"global_step": 360000, "eval_re": [955.644553506525, 1811.0172796551528, 
170.85779511908117, 2164.7475221101804, 2732.327077959948, 1520.458768916332, 
769.2675670210366, 1635.245960091234, 978.796291645565, 560.1059305269098], 
"eval_len": [289, 534, 82, 652, 885, 501, 227, 502, 271, 213]}

 37%|███▋      | 369998/1000000 [3:12:53<3:41:18, 47.44it/s]global step 370000, trans_decision ep_re 1081.2224297608561

{"global_step": 370000, "eval_re": [1201.7548609060145, 157.53581441810798, 
864.0225676914353, 709.0100154813115, 322.48358927464744, 1395.898459441452, 
2396.533991576377, 527.8947939468356, 3143.7096876982346, 93.38051717414534], 
"eval_len": [358, 64, 272, 242, 112, 416, 755, 161, 1000, 41]}

 38%|███▊      | 379997/1000000 [3:17:51<3:35:27, 47.96it/s]global step 380000, trans_decision ep_re 1094.3489596602262

{"global_step": 380000, "eval_re": [2899.063248765382, 999.1927144657575, 
876.0195335340561, 1450.19511384783, 812.90432127725, 84.05993695729435, 
355.7298625687918, 58.57870280144662, 1355.370879233446, 2052.3752831510087], 
"eval_len": [885, 1000, 253, 463, 263, 42, 166, 33, 414, 573]}

 39%|███▉      | 389995/1000000 [3:22:51<3:31:49, 48.00it/s]global step 390000, trans_decision ep_re 1242.9685385936314

{"global_step": 390000, "eval_re": [1185.286618702059, 230.3503995162305, 
1055.0557800644146, 1347.0930994967136, 3054.930668672485, 720.028287658576, 
3176.4855823780304, 614.7516207033582, 35.72031678928227, 1009.9830119551622], 
"eval_len": [407, 87, 405, 479, 1000, 237, 1000, 211, 31, 336]}

 40%|███▉      | 399996/1000000 [3:27:31<3:29:35, 47.71it/s]global step 400000, trans_decision ep_re 2058.559788955056

{"global_step": 400000, "eval_re": [3231.494842476659, 1160.4129761812137, 
1385.729066674441, 3060.897263020243, 2154.2568405286825, 2772.8855312881014, 
2124.3370931447, 1035.7042796000944, 2915.101435845227, 744.778560791197], 
"eval_len": [1000, 358, 1000, 974, 728, 881, 741, 343, 889, 220]}

 41%|████      | 409998/1000000 [3:32:31<3:27:40, 47.35it/s]global step 410000, trans_decision ep_re 1588.9832428613431

{"global_step": 410000, "eval_re": [580.6586796428215, 289.4743204796539, 
478.58208664975564, 3408.4258456381053, 975.0289030957549, 3168.9071196175405, 
3370.292039854397, 1825.7539982972066, 533.1134377392641, 1259.5959975989313], 
"eval_len": [240, 110, 152, 1000, 304, 1000, 1000, 589, 199, 1000]}

 42%|████▏     | 419995/1000000 [3:37:12<3:19:26, 48.47it/s]global step 420000, trans_decision ep_re 1086.792353185301

{"global_step": 420000, "eval_re": [249.94656844487722, 147.4375704098085, 
3616.0367294113344, 609.1233008201541, 3522.530551934209, 86.07569528676046, 
486.3437060201802, 58.603003628208434, 1213.5568410135388, 878.2695648839391], 
"eval_len": [79, 69, 1000, 164, 1000, 41, 138, 33, 306, 284]}

 43%|████▎     | 429999/1000000 [3:42:11<3:16:52, 48.25it/s]global step 430000, trans_decision ep_re 1552.946573321447

{"global_step": 430000, "eval_re": [2710.556575487275, 3126.3897237765163, 
1152.776187531764, 119.46650692544975, 1050.4237704489678, 1950.0716287316793, 
2184.2283276025823, 1443.7639162562518, 944.7194720449604, 847.0696244090216], 
"eval_len": [746, 1000, 377, 52, 289, 565, 680, 447, 272, 262]}

 44%|████▍     | 439996/1000000 [3:47:01<3:17:15, 47.32it/s]global step 440000, trans_decision ep_re 1722.7445421999516

{"global_step": 440000, "eval_re": [3240.3003717108527, 295.8566639464462, 
63.57208253417811, 3468.865911376882, 3993.067242300108, 356.922286024337, 
3698.886280050095, 1641.1525022521541, 334.6674873613434, 134.1545944431197], 
"eval_len": [884, 93, 1000, 1000, 1000, 134, 1000, 414, 103, 56]}

 45%|████▍     | 449997/1000000 [3:51:51<3:07:52, 48.79it/s]global step 450000, trans_decision ep_re 1290.202453868887

{"global_step": 450000, "eval_re": [1263.0237555606143, 1575.1053227837021, 
1312.0343403841418, 747.5921269470829, 515.5255588889892, 1612.5185676464912, 
742.5310491667204, 781.8365040972169, 3514.0106309360767, 837.8466822778336], 
"eval_len": [341, 1000, 446, 270, 181, 1000, 271, 254, 1000, 301]}

 46%|████▌     | 459999/1000000 [3:56:32<3:07:22, 48.03it/s]global step 460000, trans_decision ep_re 1280.1961731051101

{"global_step": 460000, "eval_re": [567.1317298365583, 2037.45763825871, 
34.58321123596316, 1905.8024950390127, 3165.5392797619656, 657.4677176346069, 
1808.936495220217, 2174.775006012712, 280.87237182284736, 169.3957862285087], 
"eval_len": [204, 630, 26, 558, 928, 168, 563, 610, 79, 79]}

 47%|████▋     | 469997/1000000 [4:01:31<3:04:40, 47.83it/s]global step 470000, trans_decision ep_re 934.534768011217

{"global_step": 470000, "eval_re": [1191.24736432028, 860.0764616971061, 
302.6531749373897, 666.9299818036199, 378.16793516582226, 1453.596102009038, 
296.5038496115968, 1888.0960401665366, 466.54103330495803, 1841.5357370958238], 
"eval_len": [345, 221, 117, 223, 105, 396, 103, 474, 150, 485]}

 48%|████▊     | 479997/1000000 [4:06:03<2:59:10, 48.37it/s]global step 480000, trans_decision ep_re 612.0297769680544

{"global_step": 480000, "eval_re": [1509.6111892556592, 293.90239954986396, 
733.941497203693, 107.74040810263305, 143.08501200457232, 990.6877010520285, 
414.466018646205, 526.6690839981928, 588.0490099190845, 812.1454499486122], 
"eval_len": [392, 99, 1000, 46, 58, 293, 133, 172, 189, 248]}

 49%|████▉     | 489997/1000000 [4:11:02<2:57:18, 47.94it/s]global step 490000, trans_decision ep_re 1107.450073198796

{"global_step": 490000, "eval_re": [1700.3552982971737, 1870.9257183102297, 
126.3302730911085, 415.05028222018205, 1029.3216343730226, 997.9129750846528, 
1977.4290594534932, 476.03611309795576, 196.05112835651505, 2285.088249703626], 
"eval_len": [1000, 528, 67, 135, 301, 280, 511, 201, 71, 618]}

 50%|████▉     | 499999/1000000 [4:15:35<2:53:47, 47.95it/s]global step 500000, trans_decision ep_re 972.8249579870129

{"global_step": 500000, "eval_re": [395.4435029552577, 107.97432589674037, 
9.232467413965683, 2273.4315458018905, 3650.786442368233, 1360.5336415542563, 
176.31493938811212, 263.80345643527147, 447.15486617572367, 1043.5743918806784],
"eval_len": [124, 48, 23, 657, 1000, 406, 75, 77, 125, 286]}

 51%|█████     | 509999/1000000 [4:20:32<2:50:48, 47.81it/s]global step 510000, trans_decision ep_re -2018.6571565392733

{"global_step": 510000, "eval_re": [-1942.3279105588701, -2273.911928979304, 
-2030.667018367214, -1791.8649603655408, -2369.831926969648, 
-1992.3431405907806, -1603.663576944704, -1870.7736159681276, 
-2080.2258879933115, -2230.9615986552317], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519995/1000000 [4:25:42<2:44:55, 48.51it/s]global step 520000, trans_decision ep_re -98.87255112406106

{"global_step": 520000, "eval_re": [-211.24520208411803, -353.45218161034086, 
-18.013420549755192, 7.098535951308055, 20.758039531702774, 19.64546736876791, 
41.63288893247156, 25.233213718751774, -530.7080875414821, 10.32523504208342], 
"eval_len": [1000, 1000, 1000, 106, 1000, 18, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529997/1000000 [4:30:42<2:43:06, 48.02it/s]global step 530000, trans_decision ep_re 1169.301114525538

{"global_step": 530000, "eval_re": [1383.0407709861079, 1508.0900493964762, 
1903.9952848683326, 765.046169094055, 181.93202639419255, 1582.8727017902365, 
276.77324528857946, 2007.5684487584329, 1949.4711715326428, 134.2212771463213], 
"eval_len": [386, 398, 522, 257, 87, 537, 97, 584, 573, 67]}

 54%|█████▍    | 539998/1000000 [4:35:12<2:38:39, 48.32it/s]global step 540000, trans_decision ep_re 1038.5253014479827

{"global_step": 540000, "eval_re": [2078.8772866174054, 258.6320762775154, 
663.8617219055782, 1146.6670839996764, 1085.970822259748, 451.1421947623289, 
307.74695512785877, 1488.5333676432647, 26.37993820710007, 2877.44156767935], 
"eval_len": [679, 82, 197, 323, 286, 147, 116, 413, 23, 865]}

 55%|█████▍    | 549995/1000000 [4:40:12<2:35:43, 48.16it/s]global step 550000, trans_decision ep_re 1917.4810509843733

{"global_step": 550000, "eval_re": [636.3080339555307, 1114.6060519630955, 
1718.5870557093372, 3093.266167561153, 515.1994777995333, 255.1011835751302, 
1423.2972854595628, 3660.0922818991894, 2912.8218941345162, 3845.5310777866844],
"eval_len": [205, 304, 535, 816, 174, 120, 338, 1000, 763, 1000]}

 56%|█████▌    | 559995/1000000 [4:45:02<2:36:48, 46.77it/s]global step 560000, trans_decision ep_re 1230.9915710239152

{"global_step": 560000, "eval_re": [314.7152578545972, 2768.830992526265, 
1577.5543232669106, 548.8461528675482, 144.77592595867725, 115.35651760192192, 
2105.1000951466017, 1692.081217681318, 39.538169367892095, 3003.1170579674213], 
"eval_len": [113, 722, 480, 185, 56, 54, 578, 483, 29, 802]}

 57%|█████▋    | 569996/1000000 [4:49:52<2:27:39, 48.54it/s]global step 570000, trans_decision ep_re 1373.680485697268

{"global_step": 570000, "eval_re": [275.59511824984816, 435.8807135957703, 
95.64135617137326, 1253.4634408672255, 516.4109538113312, 431.8882546206888, 
3811.759801387561, 792.6101047368904, 2257.626032967315, 3865.929080564678], 
"eval_len": [91, 155, 36, 369, 1000, 134, 990, 293, 660, 1000]}

 58%|█████▊    | 579999/1000000 [4:54:42<2:25:13, 48.20it/s]global step 580000, trans_decision ep_re 2039.4059764228296

{"global_step": 580000, "eval_re": [3643.009914110119, 959.6979681582717, 
2891.373037665063, 663.7472729386077, 2220.4907411486547, 3473.497698201244, 
366.58459455180184, 958.11435099992, 2832.3435986256395, 2385.200587828973], 
"eval_len": [1000, 323, 821, 164, 1000, 1000, 143, 1000, 1000, 645]}

 59%|█████▉    | 589997/1000000 [4:59:26<2:25:25, 46.99it/s]global step 590000, trans_decision ep_re 996.999596069847

{"global_step": 590000, "eval_re": [220.62702861638903, 1261.5335102504869, 
1666.675041582177, 277.69269560525424, 2085.55132904249, 557.2213814230803, 
1241.8429233678262, 613.4005542627143, 1375.4604910769704, 669.9910054710812], 
"eval_len": [67, 368, 444, 84, 522, 168, 319, 167, 342, 168]}

 60%|█████▉    | 599999/1000000 [5:04:22<2:19:10, 47.90it/s]global step 600000, trans_decision ep_re 1864.5366270460297

{"global_step": 600000, "eval_re": [1214.9332540413586, 609.9544298912039, 
2959.9050363454858, 3497.9233462694574, 25.577475483792277, 255.9453448395186, 
4081.3042372249374, 3874.030062356419, 124.04553797715589, 2001.747546030967], 
"eval_len": [325, 177, 764, 900, 22, 102, 1000, 1000, 56, 523]}

 61%|██████    | 609995/1000000 [5:09:12<2:14:12, 48.44it/s]global step 610000, trans_decision ep_re 1085.2955197016715

{"global_step": 610000, "eval_re": [3696.662452989244, 1074.9836840879889, 
1399.3535478455733, 658.2686545906203, 1976.6356500865345, 116.07219016026183, 
-434.4992646523457, 944.8656168172873, 593.7579515762726, 826.8547135152781], 
"eval_len": [1000, 364, 343, 200, 580, 55, 1000, 366, 170, 265]}

 62%|██████▏   | 619997/1000000 [5:14:02<2:11:26, 48.19it/s]global step 620000, trans_decision ep_re 1480.8992692760326

{"global_step": 620000, "eval_re": [3455.1899771121243, 3543.4240383986426, 
218.88505103562784, 614.4569423532039, 658.1217682519351, 446.2737621651036, 
2877.9703255107966, 1579.5603732542615, 495.862454781991, 919.2479998966397], 
"eval_len": [1000, 1000, 81, 1000, 248, 156, 1000, 472, 214, 295]}

 63%|██████▎   | 629995/1000000 [5:18:52<2:07:52, 48.22it/s]global step 630000, trans_decision ep_re 1664.846376050779

{"global_step": 630000, "eval_re": [3858.9856218485043, 1331.6012645758499, 
2371.6585964308833, 205.30395572019026, 2750.538074187796, 433.81306279280403, 
1985.0083821164833, 2753.286013361215, 227.53341723549985, 730.7353722385676], 
"eval_len": [1000, 1000, 672, 67, 1000, 119, 580, 812, 1000, 233]}

 64%|██████▍   | 639999/1000000 [5:23:52<2:04:30, 48.19it/s]global step 640000, trans_decision ep_re 1189.2900740262974

{"global_step": 640000, "eval_re": [661.3708872862734, 1344.8176631758088, 
1577.5357648861393, 899.5049856811406, 2249.9335536326876, 661.7561625483967, 
501.9298993398451, 242.35158191456676, 3309.393419182802, 444.306822615314], 
"eval_len": [1000, 333, 392, 268, 617, 185, 231, 83, 886, 206]}

 65%|██████▍   | 649997/1000000 [5:28:42<2:00:28, 48.42it/s]global step 650000, trans_decision ep_re 1849.133025883833

{"global_step": 650000, "eval_re": [2777.814595426013, 35.97888483403376, 
785.985524226207, 2966.1771770604514, 3542.832637071599, 1732.2932631896272, 
3975.069776128739, 883.2880811567388, 719.8387008545085, 1072.0516188904153], 
"eval_len": [1000, 29, 289, 791, 922, 416, 1000, 1000, 183, 290]}

 66%|██████▌   | 659995/1000000 [5:33:32<1:58:32, 47.80it/s]global step 660000, trans_decision ep_re 1219.7484167358123

{"global_step": 660000, "eval_re": [596.5521587820377, 803.0038674858702, 
774.5086474440643, 1735.8695957543364, 688.7370720067662, 290.4774551100323, 
538.0179104102685, 365.38321148919965, 4146.52329648266, 2258.4109523928873], 
"eval_len": [204, 220, 185, 436, 202, 118, 165, 128, 1000, 534]}

 67%|██████▋   | 669997/1000000 [5:38:12<1:57:55, 46.64it/s]global step 670000, trans_decision ep_re 1966.3593237043788

{"global_step": 670000, "eval_re": [2468.8122191167377, 3786.4901867969925, 
1507.6452234477988, 1241.9159169693626, 1997.4400964800025, 2539.1131986464247, 
3575.7682584699155, 1320.8190916504623, 1020.6375894361177, 204.9514560299718], 
"eval_len": [623, 1000, 436, 325, 518, 1000, 1000, 365, 231, 67]}

 68%|██████▊   | 679997/1000000 [5:43:12<1:54:06, 46.74it/s]global step 680000, trans_decision ep_re 2178.3275640310703

{"global_step": 680000, "eval_re": [2925.8958985162994, 2043.9401445176595, 
448.6534395546243, 1176.5827024451878, 3071.727009367468, 3214.7932497791703, 
3718.5393193889986, 1990.4673137542916, 260.1303922880477, 2932.5461706989545], 
"eval_len": [827, 597, 110, 359, 790, 1000, 958, 488, 81, 675]}

 69%|██████▉   | 689997/1000000 [5:48:02<1:46:23, 48.56it/s]global step 690000, trans_decision ep_re 1090.872883063121

{"global_step": 690000, "eval_re": [2362.3219512947267, 525.2994986224182, 
218.06928817071238, 1770.9254914309506, 1159.4087673549866, 3450.7458788462036, 
38.23131499958435, 745.9321233052932, 119.37481670661681, 518.4196998997182], 
"eval_len": [681, 140, 77, 474, 340, 979, 24, 242, 52, 168]}

 70%|██████▉   | 699999/1000000 [6:00:44<10:31:19,  7.92it/s]global step 700000, trans_decision ep_re 1450.0707107796666

{"global_step": 700000, "eval_re": [1123.5078946242331, 2344.0763534362673, 
98.42307379508934, 1907.0869147528683, 1422.516210040295, 3970.2686501807993, 
171.12417883994905, 272.1592731018633, 107.37284192476649, 3084.1717171005334], 
"eval_len": [328, 632, 44, 493, 455, 1000, 61, 82, 54, 765]}

 71%|███████   | 709997/1000000 [6:20:50<1:43:13, 46.83it/s]global step 710000, trans_decision ep_re 1081.3005754728867

{"global_step": 710000, "eval_re": [2678.382687657438, 503.5985783438395, 
102.8933728555122, 3309.6231629486465, 414.39436337608134, 1536.0619603741675, 
64.99423070206946, 160.96433505439583, 561.1963409845596, 1480.896722432157], 
"eval_len": [685, 140, 53, 1000, 135, 440, 38, 80, 175, 1000]}

 72%|███████▏  | 719997/1000000 [6:25:40<1:39:38, 46.83it/s]global step 720000, trans_decision ep_re 1746.9310774365053

{"global_step": 720000, "eval_re": [3405.2955658587266, 928.5126350159964, 
779.6337091359769, 58.393372097063065, 1071.0704356144915, 83.61599083816573, 
972.9547466514247, 3699.7511497879345, 3184.921631213972, 3285.161538151304], 
"eval_len": [884, 249, 201, 31, 330, 38, 264, 1000, 929, 1000]}

 73%|███████▎  | 729995/1000000 [6:30:30<1:33:16, 48.25it/s]global step 730000, trans_decision ep_re 1766.4149853060621

{"global_step": 730000, "eval_re": [2045.621235621229, 3390.7172728183878, 
3582.3964321081326, 1495.0142970003403, 132.28922920094985, 3253.876132057236, 
657.9950614817757, 1829.5831765192863, 68.67707441162464, 1207.9799418416578], 
"eval_len": [528, 1000, 947, 471, 50, 911, 248, 463, 31, 308]}

 74%|███████▍  | 739999/1000000 [6:35:20<1:29:06, 48.63it/s]global step 740000, trans_decision ep_re 1644.4739103827274

{"global_step": 740000, "eval_re": [1215.4306893200107, 4017.449574082641, 
308.30487443522173, 3148.474723647497, 2025.2083349683705, 2387.9102905082204, 
252.66468636278321, 1334.298330057157, 38.982193948513576, 1716.0154064968574], 
"eval_len": [331, 1000, 86, 743, 491, 592, 71, 350, 26, 494]}

 75%|███████▍  | 749999/1000000 [6:40:10<1:29:08, 46.74it/s]global step 750000, trans_decision ep_re 1364.0814684703385

{"global_step": 750000, "eval_re": [47.809779920645816, 1299.8167856247155, 
2290.8846705151213, 218.88654299112818, 1470.6479602460254, 457.42750366452793, 
3063.79979175006, 2406.615752082751, 1747.7019175195007, 637.2239803889096], 
"eval_len": [36, 304, 538, 70, 379, 122, 805, 595, 455, 1000]}

 76%|███████▌  | 759999/1000000 [6:45:00<1:23:24, 47.96it/s]global step 760000, trans_decision ep_re 1386.1689119751625

{"global_step": 760000, "eval_re": [1207.8655894569429, 24.04948910437219, 
105.66041215356596, 1518.5608326372646, 368.88196845579614, 3746.397459585619, 
206.6044728806453, 3091.649322495781, 3510.6542692263083, 81.36530375533029], 
"eval_len": [301, 18, 42, 478, 129, 1000, 97, 939, 1000, 40]}

 77%|███████▋  | 769997/1000000 [6:49:50<1:19:09, 48.42it/s]global step 770000, trans_decision ep_re 1788.3754718217

{"global_step": 770000, "eval_re": [2159.082125208715, 2145.5973534051586, 
3630.593182887884, 1159.5280292113603, 1570.0548294954092, 2677.774937133628, 
457.6007549387525, 797.6529056227855, 2675.3490414378543, 610.5215588754534], 
"eval_len": [608, 590, 1000, 284, 393, 642, 123, 258, 673, 154]}

 78%|███████▊  | 779996/1000000 [6:54:40<1:15:41, 48.45it/s]global step 780000, trans_decision ep_re 1382.367114404467

{"global_step": 780000, "eval_re": [223.05618036217055, 2031.2827562500238, 
886.0434042159844, -212.66520619440433, 856.992766763343, 2299.827683197036, 
1492.1571441368997, 3295.369187701807, 714.8117756079532, 2236.795452003855], 
"eval_len": [80, 547, 310, 1000, 257, 563, 1000, 845, 186, 614]}

 79%|███████▉  | 789999/1000000 [6:59:30<1:14:37, 46.90it/s]global step 790000, trans_decision ep_re 1364.2645452108852

{"global_step": 790000, "eval_re": [4301.374582113646, 1369.8096356168817, 
2180.3666374831273, 73.34537681725237, 681.0331030773499, 458.89498275866106, 
1392.9671752371892, 1227.5504325636837, 1691.512931328351, 265.79059511271106], 
"eval_len": [958, 388, 534, 43, 198, 122, 363, 377, 421, 93]}

 80%|███████▉  | 799996/1000000 [7:04:20<1:08:56, 48.35it/s]global step 800000, trans_decision ep_re 2080.952031615278

{"global_step": 800000, "eval_re": [4273.181134569096, 3731.559422515933, 
2967.971876267539, 2632.291527706787, 164.6615327685555, 995.1046291817131, 
197.96172863904434, 785.685378782343, 1082.543280581224, 3978.559805140542], 
"eval_len": [973, 1000, 731, 619, 57, 274, 64, 212, 310, 1000]}

 81%|████████  | 809996/1000000 [7:09:10<1:05:15, 48.53it/s]global step 810000, trans_decision ep_re 1604.5940527531307

{"global_step": 810000, "eval_re": [953.715154760185, 229.52244304611267, 
462.1078560697836, 511.93534158649226, 3799.785508565493, 903.3167076984814, 
2534.346484716281, 3137.3825296506175, 3496.2447175623056, 17.583783875556154], 
"eval_len": [1000, 92, 141, 143, 1000, 284, 1000, 810, 1000, 20]}

 82%|████████▏ | 819996/1000000 [7:14:00<1:03:19, 47.37it/s]global step 820000, trans_decision ep_re 890.0177393516342

{"global_step": 820000, "eval_re": [301.0417389984389, 1522.3285820769113, 
343.339217997818, 155.300967949377, 68.20763084729002, 550.5814921683075, 
3610.0064476517696, 1319.4686283436881, 153.65118429968504, 876.2515031830561], 
"eval_len": [107, 489, 135, 62, 51, 1000, 1000, 361, 87, 267]}

 83%|████████▎ | 829997/1000000 [7:18:50<58:39, 48.30it/s]global step 830000, trans_decision ep_re 1554.0455912884886

{"global_step": 830000, "eval_re": [2399.439111800751, 3456.0277088266625, 
3784.9184126774603, 829.1148860156932, 2354.2771231211555, 569.0930372833525, 
987.0175037331277, 607.3048365053887, 341.7233618929403, 211.53993102835418], 
"eval_len": [591, 1000, 1000, 234, 543, 147, 308, 169, 94, 97]}

 84%|████████▍ | 839999/1000000 [7:23:30<54:45, 48.71it/s]global step 840000, trans_decision ep_re 1432.5062576400817

{"global_step": 840000, "eval_re": [1906.1260039966774, 4053.3248736349797, 
3418.1649532572833, 573.7356665575545, 104.98743655645343, 1471.7096338356832, 
182.3386197034186, 119.65748650676217, 2810.5450773716493, -315.52717501964474],
"eval_len": [508, 977, 842, 159, 38, 420, 64, 52, 806, 1000]}

 85%|████████▍ | 849998/1000000 [7:28:20<51:38, 48.41it/s]global step 850000, trans_decision ep_re 1884.7617307544201

{"global_step": 850000, "eval_re": [1083.8154647021909, 56.7102299967812, 
3225.52644756524, 719.3735008769675, 1499.1796801907747, 4028.006595011116, 
751.0517679953863, 3413.165636245745, 2731.885202780812, 1338.9027821791897], 
"eval_len": [277, 39, 877, 188, 427, 1000, 252, 834, 738, 423]}

 86%|████████▌ | 859999/1000000 [7:33:10<49:50, 46.82it/s]global step 860000, trans_decision ep_re 1389.4169056072571

{"global_step": 860000, "eval_re": [2699.8026120677223, 993.5968477466256, 
1466.9057559990679, 1327.989691913022, 206.7982650184715, 1654.2436394997724, 
814.7394810347513, 370.2558294839737, 3815.0353799864524, 544.8015533227101], 
"eval_len": [697, 309, 419, 328, 68, 1000, 204, 135, 966, 132]}

 87%|████████▋ | 869999/1000000 [7:38:00<44:56, 48.20it/s]global step 870000, trans_decision ep_re 1627.0038950163898

{"global_step": 870000, "eval_re": [669.2712889878425, 3710.3704639190373, 
21.169893031349435, 4132.040294043369, 471.24035271177127, 2090.7688365851086, 
98.44350078819241, 2772.558593666731, 266.4450309926326, 2037.7306954378637], 
"eval_len": [201, 1000, 16, 1000, 127, 589, 63, 752, 74, 566]}

 88%|████████▊ | 879995/1000000 [7:42:50<42:05, 47.53it/s]global step 880000, trans_decision ep_re 1016.97732431043

{"global_step": 880000, "eval_re": [1236.604372060723, 1377.3378008355587, 
609.9747187393951, 326.2750167651418, 1064.9440965985696, 1070.4517091284638, 
2161.6172785190815, 1851.050111458537, 287.20811664327425, 184.3100223555557], 
"eval_len": [344, 406, 177, 107, 285, 362, 520, 457, 131, 66]}

 89%|████████▉ | 889995/1000000 [7:47:30<39:01, 46.99it/s]global step 890000, trans_decision ep_re 586.1623176218263

{"global_step": 890000, "eval_re": [60.13280569564877, 1238.8546881231346, 
2149.0318475201943, 927.1321659500102, 212.29496262097072, 46.85179356261572, 
526.1973120345898, 144.53542985686894, 273.34608827702954, 283.24608257720007], 
"eval_len": [44, 380, 631, 220, 65, 30, 130, 57, 88, 90]}

 90%|████████▉ | 899999/1000000 [7:52:10<34:36, 48.15it/s]global step 900000, trans_decision ep_re 1112.662210508215

{"global_step": 900000, "eval_re": [792.0170176547699, 1030.75608870532, 
262.46958080006146, 2055.971658467441, 1237.946822354831, 432.1127592869186, 
3203.303070899564, 1389.4726200598984, 629.8243648804385, 92.74812197290764], 
"eval_len": [234, 335, 90, 672, 389, 134, 945, 366, 235, 40]}

 91%|█████████ | 909995/1000000 [7:57:00<32:02, 46.82it/s]global step 910000, trans_decision ep_re 2180.2037439388178

{"global_step": 910000, "eval_re": [2386.122103904839, 1083.7484579899253, 
4068.5737300015594, 1101.4938407208176, 3947.3127776915835, 2363.2817275008106, 
65.7385840498104, 1082.0515245542847, 2103.2773368407147, 3600.43735613383], 
"eval_len": [1000, 288, 1000, 327, 1000, 635, 39, 310, 585, 1000]}

 92%|█████████▏| 919998/1000000 [8:01:50<27:54, 47.77it/s]global step 920000, trans_decision ep_re 957.136434463064

{"global_step": 920000, "eval_re": [504.3727322324093, 422.28048314708525, 
739.7064119449599, 2833.185355533694, 2150.9371891828137, 148.70355519135003, 
648.2907327225779, 457.9149141816873, 552.698192387757, 1113.2747781063063], 
"eval_len": [149, 145, 267, 709, 1000, 50, 185, 134, 1000, 308]}

 93%|█████████▎| 929997/1000000 [8:06:40<24:16, 48.06it/s]global step 930000, trans_decision ep_re 1029.587382004282

{"global_step": 930000, "eval_re": [841.8811212589757, 46.91655568991408, 
968.2185894976332, 1239.1835156135119, 365.6228461836301, 1102.6895791049153, 
91.40263816868016, 1686.4458316560908, 3533.21458240344, 420.29856046602816], 
"eval_len": [221, 42, 313, 351, 102, 243, 69, 459, 1000, 149]}

 94%|█████████▍| 939998/1000000 [8:11:30<20:53, 47.86it/s]global step 940000, trans_decision ep_re 1927.588490757304

{"global_step": 940000, "eval_re": [3301.4389499234317, 963.2515132489968, 
590.0243809688691, 2318.652026464467, 1655.6950248136104, 2111.6859812423736, 
3406.4804229076103, 1673.224164139292, 915.5998463617094, 2339.832597502677], 
"eval_len": [931, 256, 1000, 661, 508, 515, 856, 396, 288, 618]}

 95%|█████████▍| 949999/1000000 [8:16:20<17:24, 47.89it/s]global step 950000, trans_decision ep_re 2231.16377384699

{"global_step": 950000, "eval_re": [2808.4313274777796, 3329.6354770667836, 
2361.6426741149776, 445.9593812454793, 1241.246287136364, 3576.4371731088468, 
3965.5600356446653, 3835.218138313556, 424.5121320177798, 322.995112343669], 
"eval_len": [783, 1000, 529, 125, 381, 1000, 1000, 1000, 169, 100]}

 96%|█████████▌| 959998/1000000 [8:21:01<14:05, 47.32it/s]global step 960000, trans_decision ep_re 1064.7498266376358

{"global_step": 960000, "eval_re": [120.19244255048437, 2339.7562578692173, 
592.1361635756086, 2305.720908254119, 341.17680445103565, 401.6452492979543, 
221.79681245976556, 2535.3993569618065, 192.4987544655205, 1597.175516490847], 
"eval_len": [45, 695, 165, 517, 100, 144, 72, 597, 62, 1000]}

 97%|█████████▋| 969999/1000000 [8:26:01<10:28, 47.71it/s]global step 970000, trans_decision ep_re 1949.5614144480546

{"global_step": 970000, "eval_re": [3166.747811837034, 26.879411804477527, 
502.03004712050915, 2143.688792731941, 209.60815068381766, 3858.0301779160945, 
3180.7916236933147, 2057.57778847224, 891.1839624312468, 3459.0763777898687], 
"eval_len": [948, 25, 128, 537, 78, 1000, 781, 581, 216, 910]}

 98%|█████████▊| 979999/1000000 [8:30:51<06:55, 48.18it/s]global step 980000, trans_decision ep_re 836.6646973098841

{"global_step": 980000, "eval_re": [42.81986293394853, 233.10144927945154, 
1177.0716655772057, 1276.695816349582, 869.1830679711811, 277.19048290244825, 
841.1402104061661, 35.97087582914765, 2300.862419275826, 1312.6111225738837], 
"eval_len": [26, 75, 258, 366, 187, 82, 223, 27, 648, 358]}

 99%|█████████▉| 989999/1000000 [8:35:31<03:27, 48.15it/s]global step 990000, trans_decision ep_re 1442.1173239352715

{"global_step": 990000, "eval_re": [188.0570879639571, 149.44716604739858, 
3691.094511833686, 700.0071430850093, 4018.4495774375046, 1828.4908087458732, 
1014.8879567032784, 469.2681551755382, 508.7016300888514, 1852.7692022716192], 
"eval_len": [89, 54, 910, 157, 1000, 495, 236, 138, 136, 538]}

100%|█████████▉| 999999/1000000 [8:40:21<00:00, 47.85it/s]global step 1000000, trans_decision ep_re 2174.180750571007

{"global_step": 1000000, "eval_re": [982.3234463925005, 521.61999526873, 
3918.620896345675, 2920.5237038217997, 2105.3726464059555, 2569.9229658102586, 
787.2453332786209, 3234.0768354453917, 4017.167879390302, 684.9338035508354], 
"eval_len": [337, 139, 1000, 673, 611, 639, 279, 835, 1000, 179]}

100%|██████████| 1000000/1000000 [8:40:29<00:00, 32.02it/s]
