
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9996/1000000 [03:10<7:08:40, 38.49it/s]global step 10000, trans_decision ep_re 245.25070765941965

{"global_step": 10000, "eval_re": [280.0729347700473, 134.44834508904205, 
246.01984455627374, 233.42646268470605, 106.65436676346245, 295.97581665567975, 
239.40342457141858, 396.040533762756, 313.19925703552695, 207.2660907052836], 
"eval_len": [166, 143, 151, 127, 232, 190, 132, 290, 200, 120]}

  2%|▏         | 19999/1000000 [09:10<7:02:56, 38.62it/s]global step 20000, trans_decision ep_re 207.2755837349137

{"global_step": 20000, "eval_re": [241.77539204371644, 201.07160365156906, 
215.02640592082932, 174.34528545356062, 199.14529356942765, 213.12179657045738, 
212.1231128507629, 215.9952680706585, 184.83447746217277, 215.31720175598224], 
"eval_len": [138, 116, 143, 115, 120, 115, 111, 117, 103, 123]}

  3%|▎         | 29996/1000000 [14:52<6:59:21, 38.55it/s]global step 30000, trans_decision ep_re 245.75236861315125

{"global_step": 30000, "eval_re": [427.2335848304553, 214.63072569646695, 
103.02819517203247, 86.04159261709728, 275.59096968675703, 191.6936099767437, 
429.32831890019395, 626.0811218806206, 19.882307171015842, 84.01326020012954], 
"eval_len": [292, 198, 145, 155, 198, 107, 251, 358, 29, 128]}

  4%|▍         | 39997/1000000 [21:00<6:53:11, 38.72it/s]global step 40000, trans_decision ep_re 270.4440077792696

{"global_step": 40000, "eval_re": [720.8405486456367, 147.1915737348547, 
214.85439240128596, 303.7037946228513, 245.34799087186232, 169.8792841653963, 
372.23676453446905, 147.15672523173976, 217.7918334148651, 165.43717016973497], 
"eval_len": [731, 112, 148, 186, 143, 116, 216, 208, 161, 115]}

  5%|▍         | 49999/1000000 [26:43<6:48:01, 38.81it/s]global step 50000, trans_decision ep_re 141.93276798258495

{"global_step": 50000, "eval_re": [224.84250417917167, 28.875279716340096, 
351.48352141257544, 267.13684302343796, 31.499001370610934, 165.97454253997557, 
259.1641545452206, 31.517696840180296, 31.21922755028303, 27.614908648054055], 
"eval_len": [132, 42, 150, 133, 41, 101, 142, 42, 42, 41]}

  6%|▌         | 59997/1000000 [32:35<6:43:42, 38.81it/s]global step 60000, trans_decision ep_re 341.68194176718896

{"global_step": 60000, "eval_re": [365.81136394652304, 84.23009089353067, 
257.5195284915706, 201.50249551593112, 268.07230709141584, 550.180158794865, 
633.254244764703, 299.5053560614216, 17.247867634417574, 739.4960044775111], 
"eval_len": [190, 94, 211, 212, 172, 278, 290, 158, 28, 310]}

  7%|▋         | 69997/1000000 [38:30<6:41:21, 38.62it/s]global step 70000, trans_decision ep_re 421.7839867557712

{"global_step": 70000, "eval_re": [476.68273340537286, 181.29275528653056, 
538.3663847107429, 689.4610833002307, 525.2347729446109, 243.7027075908258, 
412.0621428354874, 356.6867659574078, 569.7674717811578, 224.5830497453452], 
"eval_len": [307, 105, 308, 373, 262, 139, 349, 193, 348, 166]}

  8%|▊         | 79999/1000000 [44:27<6:34:26, 38.87it/s]global step 80000, trans_decision ep_re 123.63432735130236

{"global_step": 80000, "eval_re": [316.18960308479245, 13.66685606551554, 
37.08916367243995, 280.68334346101364, 182.2404308171568, 44.08815960274287, 
206.55350898103072, 15.965376936349609, 13.886108923633435, 125.98072196834846],
"eval_len": [158, 24, 43, 132, 130, 47, 104, 27, 25, 88]}

  9%|▉         | 89996/1000000 [50:30<6:26:27, 39.25it/s]global step 90000, trans_decision ep_re 290.6474341303457

{"global_step": 90000, "eval_re": [148.7219546328555, 44.1272152946918, 
222.15634829413196, 238.88538791729138, 481.523638019644, 523.6350780829135, 
582.7914742173983, 191.61729107950742, 42.77566444087726, 430.2402893241456], 
"eval_len": [111, 49, 108, 110, 222, 384, 229, 103, 48, 223]}

 10%|▉         | 99999/1000000 [56:11<6:42:05, 37.31it/s]global step 100000, trans_decision ep_re 328.2761351087356

{"global_step": 100000, "eval_re": [164.47217116222748, 157.36013988210584, 
371.05487049874375, 488.76356118022517, 200.33402265337077, 298.9676352082863, 
326.0058349852356, 378.104539776972, 222.75304924512446, 674.9455264950642], 
"eval_len": [105, 90, 140, 223, 110, 130, 131, 208, 172, 234]}

 11%|█         | 109997/1000000 [1:02:04<6:38:20, 37.24it/s]global step 110000, trans_decision ep_re 314.69054529857647

{"global_step": 110000, "eval_re": [267.09373788556206, 68.76414375251636, 
655.6195795128297, 418.8150483813753, 498.2063864682476, 464.86388299360044, 
195.77729136986218, 383.8796829170587, 92.1312148118015, 101.75448489291125], 
"eval_len": [173, 77, 332, 221, 240, 289, 123, 239, 115, 115]}

 12%|█▏        | 119998/1000000 [1:08:10<6:32:15, 37.39it/s]global step 120000, trans_decision ep_re 363.61406801762723

{"global_step": 120000, "eval_re": [333.53246904551105, 904.1600159025821, 
88.61056792033041, 76.64093870792338, 66.0121685694683, 188.93308189006802, 
1438.288510629609, 290.06239387799263, 160.8855022825172, 89.01503135027008], 
"eval_len": [240, 364, 101, 89, 69, 196, 942, 144, 88, 116]}

 13%|█▎        | 129997/1000000 [1:13:54<6:37:55, 36.44it/s]global step 130000, trans_decision ep_re 100.54840898602674

{"global_step": 130000, "eval_re": [37.33561024994649, 146.9336642175413, 
215.32357009613307, 112.65926741529515, 15.767049132430698, 127.41852977906146, 
133.4298695377462, 135.70171460341717, 40.07024526131766, 40.84456956737823], 
"eval_len": [46, 77, 114, 72, 26, 74, 75, 78, 46, 45]}

 14%|█▍        | 139999/1000000 [1:19:45<6:13:03, 38.42it/s]global step 140000, trans_decision ep_re 159.7819085338684

{"global_step": 140000, "eval_re": [62.55242564264497, 66.35043469598357, 
150.6821081359661, 49.05670618790553, 152.55303776347145, 61.89874997665612, 
287.3768541858687, 81.02245435507697, 437.43614862821386, 248.8901657668969], 
"eval_len": [63, 74, 89, 49, 111, 63, 166, 74, 216, 161]}

 15%|█▍        | 149998/1000000 [1:25:36<6:04:02, 38.92it/s]global step 150000, trans_decision ep_re 222.03657906029645

{"global_step": 150000, "eval_re": [99.92691553618818, 194.9713299375649, 
234.77965647262377, 342.29950804163224, 39.854071102799765, 43.46003814041154, 
287.9456097947321, 647.0836646216219, 313.6091062376982, 16.435890717692022], 
"eval_len": [105, 124, 150, 189, 44, 46, 172, 237, 173, 26]}

 16%|█▌        | 159997/1000000 [1:31:40<6:00:25, 38.84it/s]global step 160000, trans_decision ep_re 593.6969628041179

{"global_step": 160000, "eval_re": [517.1538692628683, 1187.661567088245, 
566.4596437873623, 837.7161316107535, 628.4945650065803, 138.012697861066, 
659.2213036515889, 205.04541269635308, 520.4162788286471, 676.7881582477152], 
"eval_len": [208, 516, 276, 281, 277, 79, 232, 177, 279, 301]}

 17%|█▋        | 169996/1000000 [1:37:25<5:57:05, 38.74it/s]global step 170000, trans_decision ep_re 203.61713988994842

{"global_step": 170000, "eval_re": [203.43735618045773, 33.34559519429995, 
86.9004079431129, 11.82918366541134, 37.64340567340978, 464.05229997428825, 
529.3323686626834, 389.2564577451721, 32.630123589111825, 247.74420027153653], 
"eval_len": [109, 41, 70, 23, 43, 180, 186, 226, 42, 131]}

 18%|█▊        | 179996/1000000 [1:43:30<5:52:06, 38.81it/s]global step 180000, trans_decision ep_re 391.7626994456883

{"global_step": 180000, "eval_re": [426.45935503122365, 452.74818779801893, 
273.5615213430177, 400.3281128798566, 524.6902894500955, 121.55575946388407, 
680.6861423562412, 913.9764156915568, 108.37005329077353, 15.251157152214665], 
"eval_len": [192, 252, 173, 187, 280, 100, 251, 359, 112, 26]}

 19%|█▉        | 189999/1000000 [1:49:12<5:49:43, 38.60it/s]global step 190000, trans_decision ep_re 520.2859279856688

{"global_step": 190000, "eval_re": [547.0709364898405, 466.5529780364695, 
352.06769133639403, 442.47054354503666, 724.3138815819918, 135.08562093516525, 
696.5162159668655, 624.5939712569733, 238.6082440125143, 975.5791966954367], 
"eval_len": [267, 179, 175, 191, 260, 102, 280, 246, 124, 378]}

 20%|█▉        | 199997/1000000 [1:55:20<5:43:24, 38.83it/s]global step 200000, trans_decision ep_re 205.46449358274123

{"global_step": 200000, "eval_re": [27.141998271780025, 245.51956483444096, 
30.116578952954065, 131.53653972472137, 178.4122307129096, 40.15580876054188, 
15.789480621402971, 14.187718226277967, 1042.594783065239, 329.1902326571443], 
"eval_len": [42, 139, 43, 89, 108, 45, 26, 25, 412, 165]}

 21%|██        | 209996/1000000 [2:01:01<5:39:24, 38.79it/s]global step 210000, trans_decision ep_re 220.35517013109788

{"global_step": 210000, "eval_re": [180.99688476173083, 339.6245487185793, 
43.75206448461654, 25.773334725405423, 355.51042991071813, 226.2460641821824, 
45.641254942938964, 641.150787472561, 184.2134889984687, 160.64284311377767], 
"eval_len": [149, 171, 55, 41, 160, 127, 55, 247, 151, 106]}

 22%|██▏       | 219996/1000000 [2:06:54<5:34:48, 38.83it/s]global step 220000, trans_decision ep_re 262.3948788035622

{"global_step": 220000, "eval_re": [37.53530035721619, 554.602918216566, 
40.82709455433627, 15.549989274050064, 959.1829345031812, 62.770346862385516, 
37.739091634073034, 758.8371919996479, 34.71510794382791, 122.18881269033832], 
"eval_len": [43, 221, 44, 26, 291, 65, 43, 273, 43, 97]}

 23%|██▎       | 229999/1000000 [2:12:46<5:33:41, 38.46it/s]global step 230000, trans_decision ep_re 138.62655157562088

{"global_step": 230000, "eval_re": [34.12308066004113, 82.5662512554657, 
92.0862751858021, 948.1873209866063, 31.49248414243909, 20.878311681596152, 
72.25923926658554, 31.850408940883312, 34.51747343807182, 38.30467019871757], 
"eval_len": [42, 68, 79, 324, 43, 40, 70, 43, 43, 43]}

 24%|██▍       | 239997/1000000 [2:18:37<5:30:26, 38.33it/s]global step 240000, trans_decision ep_re 216.76440612818823

{"global_step": 240000, "eval_re": [181.21768700626853, 147.40210924367594, 
202.28656728472245, 307.137493669616, 32.22384279661958, 105.63996568394735, 
309.17930427947596, 297.06656506810384, 114.43572214580779, 471.05480410364464],
"eval_len": [91, 98, 126, 179, 44, 83, 150, 157, 81, 233]}

 25%|██▍       | 249999/1000000 [2:24:32<5:36:18, 37.17it/s]global step 250000, trans_decision ep_re 173.40540206804926

{"global_step": 250000, "eval_re": [36.687017167420194, 14.968195094276698, 
195.74537392073765, 106.88088118547894, 155.0013075824481, 36.44009931542957, 
142.65423207331753, 347.3523315480302, 115.06983277490363, 583.25475001845], 
"eval_len": [45, 26, 174, 83, 89, 44, 101, 155, 113, 219]}

 26%|██▌       | 259997/1000000 [2:30:23<5:30:31, 37.31it/s]global step 260000, trans_decision ep_re 314.8983996717232

{"global_step": 260000, "eval_re": [139.53460040288843, 123.83810628615956, 
1556.7979683229373, 31.24280438540031, 204.06082426591732, 389.43835867483836, 
251.62529523193487, 38.545848236082385, 33.445128573955614, 380.4550623371177], 
"eval_len": [122, 119, 535, 42, 110, 224, 148, 43, 43, 170]}

 27%|██▋       | 269997/1000000 [2:36:18<5:24:54, 37.45it/s]global step 270000, trans_decision ep_re 97.05794245310003

{"global_step": 270000, "eval_re": [114.0467861697595, 71.08172020055122, 
34.364088113037255, 149.3612189254584, 37.4550343587994, 33.57000273663008, 
31.31478544526822, 34.585945417597756, 288.5777446651641, 176.22209849873425], 
"eval_len": [77, 68, 42, 100, 44, 42, 41, 43, 131, 110]}

 28%|██▊       | 279999/1000000 [2:42:20<5:10:05, 38.70it/s]global step 280000, trans_decision ep_re 211.60919280765287

{"global_step": 280000, "eval_re": [622.7491055540264, 74.19935190984141, 
49.26661342616665, 37.1621592576157, 494.8247835009688, 30.633978911774733, 
465.0661668856456, 216.87950158712596, 87.82359677269733, 37.48667027066618], 
"eval_len": [223, 71, 58, 43, 205, 41, 215, 148, 73, 43]}

 29%|██▉       | 289996/1000000 [2:48:01<5:07:32, 38.48it/s]global step 290000, trans_decision ep_re 136.17844193254356

{"global_step": 290000, "eval_re": [118.78199725942393, 138.12415553490652, 
249.6915088994478, 176.09061349372476, 150.62217469327322, 104.98146641780319, 
126.66846686194427, 34.85805337393434, 120.27303278029915, 141.69295001067854], 
"eval_len": [113, 108, 156, 169, 112, 110, 116, 43, 89, 118]}

 30%|██▉       | 299999/1000000 [2:53:54<5:01:58, 38.64it/s]global step 300000, trans_decision ep_re 209.185699332367

{"global_step": 300000, "eval_re": [901.631589822276, 349.0739749184541, 
36.04390516457383, 35.09911471253047, 327.3752774400563, 32.82610736569417, 
54.814931346101844, 288.6102139873133, 30.243569532021237, 36.13830903464867], 
"eval_len": [323, 149, 42, 42, 149, 42, 55, 143, 40, 42]}

 31%|███       | 309998/1000000 [2:59:45<4:56:13, 38.82it/s]global step 310000, trans_decision ep_re 151.4657790517111

{"global_step": 310000, "eval_re": [94.32051461725464, 295.9171171828762, 
67.43974903555407, 111.62555014161539, 134.27890225091988, 47.022796599753555, 
617.7330542249078, 40.02649448187756, 91.29299947101617, 15.000612511336005], 
"eval_len": [80, 141, 64, 103, 74, 54, 198, 46, 72, 26]}

 32%|███▏      | 319997/1000000 [3:05:36<4:52:15, 38.78it/s]global step 320000, trans_decision ep_re 226.85595147637179

{"global_step": 320000, "eval_re": [161.76525973124606, 329.86247356619145, 
115.7431408740003, 134.45494103518539, 118.49959692571962, 263.97668017929385, 
52.88655939224284, 677.8803253094501, 162.01303001380498, 251.4775077365832], 
"eval_len": [115, 322, 84, 111, 95, 117, 58, 282, 105, 112]}

 33%|███▎      | 329997/1000000 [3:11:40<4:48:22, 38.72it/s]global step 330000, trans_decision ep_re 171.3759683365484

{"global_step": 330000, "eval_re": [292.0857403748316, 315.25916782871377, 
393.923900425771, 36.57958808839235, 50.117509416059285, 48.1467959985967, 
46.57696280071333, 40.11860406286969, 17.037763545666845, 473.9136508238693], 
"eval_len": [141, 154, 174, 44, 45, 52, 51, 44, 27, 196]}

 34%|███▍      | 339997/1000000 [3:17:21<4:40:55, 39.16it/s]global step 340000, trans_decision ep_re 245.7596279858912

{"global_step": 340000, "eval_re": [657.9983789501113, 578.9372699042087, 
60.63329888745843, 51.4158437706041, 35.057354796824924, 31.19367506718828, 
254.1077006399132, 40.07049462335231, 608.9296380987971, 139.252625120454], 
"eval_len": [233, 200, 64, 59, 42, 41, 161, 44, 229, 92]}

 35%|███▍      | 349998/1000000 [3:23:13<4:51:17, 37.19it/s]global step 350000, trans_decision ep_re 262.2602155703618

{"global_step": 350000, "eval_re": [38.241179607803076, 255.49761405167024, 
50.15149442572079, 1013.2411744655016, 40.17472444758231, 31.10158716439009, 
553.6013768065922, 41.4897295526384, 564.3761919442538, 34.72708323746592], 
"eval_len": [43, 161, 54, 361, 45, 42, 245, 44, 231, 41]}

 36%|███▌      | 359999/1000000 [3:29:06<4:50:56, 36.66it/s]global step 360000, trans_decision ep_re 315.5411124945824

{"global_step": 360000, "eval_re": [142.97798824857207, 200.14267778351382, 
38.72052596615862, 34.96168048445489, 410.68920037991217, 87.14260576994646, 
380.94514182874343, 40.093386243940245, 35.181182384618545, 1784.556735855964], 
"eval_len": [101, 115, 43, 42, 199, 74, 158, 43, 43, 494]}

 37%|███▋      | 369998/1000000 [3:35:10<4:40:42, 37.41it/s]global step 370000, trans_decision ep_re 346.2465820294857

{"global_step": 370000, "eval_re": [315.7680669184042, 343.3592373766616, 
188.87453871787835, 395.00612836406367, 312.113407750866, 968.6065691580576, 
38.144814562637656, 383.7826454554993, 17.091489412525515, 499.7189225782635], 
"eval_len": [140, 158, 132, 217, 140, 311, 45, 193, 28, 231]}

 38%|███▊      | 379997/1000000 [3:40:52<4:35:54, 37.45it/s]global step 380000, trans_decision ep_re 400.3517132949182

{"global_step": 380000, "eval_re": [495.36257116720014, 799.1950131549999, 
271.0622275761757, 581.4386135173037, 121.49609156727385, 199.40141702980014, 
362.3180816728999, 15.612494665590695, 34.02531883012178, 1123.6053037678166], 
"eval_len": [234, 267, 123, 261, 105, 117, 151, 26, 43, 387]}

 39%|███▉      | 389997/1000000 [3:46:46<4:31:52, 37.39it/s]global step 390000, trans_decision ep_re 77.77606257709073

{"global_step": 390000, "eval_re": [42.77382464092547, 142.4595706916254, 
123.69420492517568, 36.39002080566721, 34.11404611479813, 276.3724584657865, 
28.727771460432873, 17.425032976694247, 32.064100220724825, 43.73959546907695], 
"eval_len": [45, 85, 89, 43, 42, 142, 41, 27, 42, 44]}

 40%|███▉      | 399999/1000000 [3:52:50<4:17:56, 38.77it/s]global step 400000, trans_decision ep_re 342.5753681668903

{"global_step": 400000, "eval_re": [378.696167886253, 37.94028572132938, 
34.42068942673366, 1017.2635062247493, 488.7700242947282, 265.86759359002946, 
372.1665921503447, 663.865142044075, 130.4008514192376, 36.36282891142256], 
"eval_len": [171, 42, 43, 341, 244, 165, 176, 286, 118, 42]}

 41%|████      | 409997/1000000 [3:58:31<4:14:17, 38.67it/s]global step 410000, trans_decision ep_re 126.92219409995548

{"global_step": 410000, "eval_re": [39.46725456241161, 32.08598810903353, 
86.44829325928238, 66.98956651514854, 390.30830077565327, 51.30460372985989, 
122.03210837296768, 121.66376508395447, 122.20115837832024, 236.72090221292314],
"eval_len": [43, 42, 74, 70, 199, 53, 91, 86, 85, 140]}

 42%|████▏     | 419997/1000000 [4:04:22<4:09:19, 38.77it/s]global step 420000, trans_decision ep_re 248.9254120696885

{"global_step": 420000, "eval_re": [305.0972501733826, 60.65794961329227, 
159.6437266507646, 623.9511576723204, 81.58675859920392, 172.95645553867263, 
586.7855425311141, 263.6560959120638, 91.21688064755261, 143.70230335851818], 
"eval_len": [149, 70, 100, 224, 226, 226, 195, 225, 71, 100]}

 43%|████▎     | 429999/1000000 [4:10:15<4:05:02, 38.77it/s]global step 430000, trans_decision ep_re 443.81341775253924

{"global_step": 430000, "eval_re": [137.34094614271163, 14.998453934363097, 
79.78528045387067, 1289.7749592311325, 137.02135528359648, 430.6118208722931, 
68.49501831100427, 653.9592243329528, 837.0914909124019, 789.0556280510656], 
"eval_len": [91, 27, 74, 403, 97, 164, 75, 234, 244, 241]}

 44%|████▍     | 439997/1000000 [4:16:20<4:01:46, 38.60it/s]global step 440000, trans_decision ep_re 492.17232483491443

{"global_step": 440000, "eval_re": [193.75241631894784, 1024.863255625807, 
57.56281765660091, 1157.6885884701144, 150.08381566252206, 997.0877763785089, 
59.636960910286895, 16.740646104567002, 88.48420208416363, 1175.822769137625], 
"eval_len": [136, 371, 63, 379, 100, 312, 57, 28, 80, 346]}

 45%|████▍     | 449997/1000000 [4:22:03<3:56:42, 38.73it/s]global step 450000, trans_decision ep_re 131.10491271803397

{"global_step": 450000, "eval_re": [169.2594381046258, 129.0536392089764, 
19.771029956857006, 112.30786041289295, 40.8690187416379, 144.9222327278119, 
388.6631623252009, 134.46221249122735, 142.75120116737227, 28.98933204373719], 
"eval_len": [105, 89, 29, 86, 49, 84, 177, 97, 93, 43]}

 46%|████▌     | 459996/1000000 [4:27:53<3:52:53, 38.65it/s]global step 460000, trans_decision ep_re 481.55582854325905

{"global_step": 460000, "eval_re": [206.202976860633, 229.6766214337672, 
787.0939752195746, 448.6836168933193, 242.73378527334353, 246.9896880515821, 
1356.1247612648356, 265.74661636893495, 720.8322579503307, 311.4739861162687], 
"eval_len": [131, 139, 262, 199, 112, 138, 340, 127, 226, 150]}

 47%|████▋     | 469997/1000000 [4:34:00<3:48:44, 38.62it/s]global step 470000, trans_decision ep_re 292.85675371553486

{"global_step": 470000, "eval_re": [219.53328080382906, 701.7635016587091, 
195.45517361401838, 153.852056092181, 66.64483522080171, 229.89870658700298, 
15.290577978004253, 71.25367184281869, 434.2181782925476, 840.6575550654359], 
"eval_len": [154, 338, 114, 158, 63, 158, 26, 72, 158, 302]}

 48%|████▊     | 479999/1000000 [4:39:50<3:43:36, 38.76it/s]global step 480000, trans_decision ep_re 301.13914087434307

{"global_step": 480000, "eval_re": [282.0744708567434, 15.674830805802127, 
535.6456270925427, 585.0841080303737, 604.5018068482026, 528.9758241737723, 
39.06925234870543, 324.275122050025, 59.262173422754, 36.828193114509354], 
"eval_len": [124, 26, 211, 200, 264, 185, 46, 191, 62, 44]}

 49%|████▉     | 489997/1000000 [4:45:30<3:47:54, 37.30it/s]global step 490000, trans_decision ep_re 97.99685261497083

{"global_step": 490000, "eval_re": [96.12171872746794, 107.87353256778826, 
115.15345843018233, 123.26974109513249, 16.65265914353737, 71.23281418650248, 
112.37885865280941, 103.28876012796756, 105.02346150744552, 128.9735217108751], 
"eval_len": [69, 79, 89, 72, 27, 61, 74, 85, 72, 77]}

 50%|████▉     | 499997/1000000 [4:51:30<3:40:48, 37.74it/s]global step 500000, trans_decision ep_re 207.98690849429164

{"global_step": 500000, "eval_re": [63.653661696734645, 365.06654769124367, 
94.09131065904471, 724.1260629176963, 98.7011335467649, 304.4210886350153, 
219.0336102416635, 37.474493961995414, 99.0334309885632, 74.26774460419466], 
"eval_len": [66, 156, 89, 238, 77, 143, 118, 45, 79, 74]}

 51%|█████     | 509996/1000000 [4:57:10<3:30:44, 38.75it/s]global step 510000, trans_decision ep_re 194.47271276886985

{"global_step": 510000, "eval_re": [35.1796457665888, 35.13785631591852, 
38.1006021004368, 287.9170050569469, 34.1292895409366, 37.75247948066484, 
36.30350041986155, 1336.2335530855555, 65.50575777788012, 38.467438143909064], 
"eval_len": [45, 45, 46, 134, 48, 47, 43, 387, 63, 46]}

 52%|█████▏    | 519996/1000000 [5:03:01<3:26:02, 38.83it/s]global step 520000, trans_decision ep_re 348.5569831853583

{"global_step": 520000, "eval_re": [77.70788477469677, 56.22503187588033, 
60.0826527059086, 1070.7765208276514, 160.58782545853364, 781.6906764041452, 
117.36890854850867, 42.17394149215388, 303.1080378420589, 815.8483519240458], 
"eval_len": [70, 61, 60, 285, 111, 254, 97, 44, 161, 280]}

 53%|█████▎    | 529999/1000000 [5:08:54<3:21:38, 38.85it/s]global step 530000, trans_decision ep_re 547.945378539261

{"global_step": 530000, "eval_re": [908.8752343855028, 447.04775729369936, 
565.0929498956642, 737.338784217682, 385.9042716760498, 731.1152451902822, 
33.15286217304835, 716.8281344476173, 480.81902787957813, 473.2795182334856], 
"eval_len": [264, 179, 204, 266, 149, 233, 42, 211, 191, 187]}

 54%|█████▍    | 539996/1000000 [5:14:50<3:17:21, 38.85it/s]global step 540000, trans_decision ep_re 43.44198032866096

{"global_step": 540000, "eval_re": [38.49900855284719, 47.496729277649344, 
15.041401796710286, 66.87183880038042, 35.587153638789104, 38.5914050448293, 
38.6225230792271, 44.47464298433052, 44.128253961635245, 65.10684615021117], 
"eval_len": [47, 49, 28, 70, 47, 47, 46, 49, 48, 57]}

 55%|█████▍    | 549997/1000000 [5:20:41<3:14:07, 38.64it/s]global step 550000, trans_decision ep_re 417.1310378058235

{"global_step": 550000, "eval_re": [62.69964528818552, 309.31148206255835, 
262.7468979189987, 77.76293225318238, 586.2358571746328, 62.052533129105626, 
287.48748866360773, 111.22385335819003, 850.4428813906918, 1561.3468068190825], 
"eval_len": [63, 137, 144, 65, 271, 63, 139, 88, 308, 426]}

 56%|█████▌    | 559998/1000000 [5:26:34<3:09:05, 38.78it/s]global step 560000, trans_decision ep_re 378.9668027735798

{"global_step": 560000, "eval_re": [809.763379789908, 73.82719941169009, 
78.63191292017324, 1262.3764609297314, 197.0696738246789, 62.15761582470791, 
49.2472639285401, 510.3471914673593, 56.803341596187835, 689.4439880428212], 
"eval_len": [264, 69, 71, 334, 110, 75, 48, 189, 58, 245]}

 57%|█████▋    | 569998/1000000 [5:32:28<3:04:46, 38.79it/s]global step 570000, trans_decision ep_re 176.33031855426947

{"global_step": 570000, "eval_re": [143.44455500606855, 309.8784928746568, 
13.023005857640447, 98.09727804055753, 155.3805507358662, 151.98389298463132, 
30.691999701276863, 150.33851192733482, 171.14815292077137, 539.3167454938908], 
"eval_len": [73, 152, 25, 165, 101, 81, 43, 84, 106, 241]}

 58%|█████▊    | 579996/1000000 [5:38:30<3:00:06, 38.87it/s]global step 580000, trans_decision ep_re 349.1673653315616

{"global_step": 580000, "eval_re": [34.46235982382532, 595.2071987839677, 
563.1052972432163, 703.8532567573219, 176.20136942946948, 15.112285679280975, 
373.83878368987735, 126.14169257029765, 297.42018649570974, 606.3312228426494], 
"eval_len": [43, 199, 205, 228, 122, 26, 145, 92, 137, 201]}

 59%|█████▉    | 589997/1000000 [5:44:11<2:55:15, 38.99it/s]global step 590000, trans_decision ep_re 212.4098020255688

{"global_step": 590000, "eval_re": [121.14436608570625, 45.38650027910005, 
50.719884004594135, 1424.1282494491843, 43.97406058574195, 36.92369241545328, 
121.04027270675734, 44.852378705673246, 97.07545244669882, 138.85316357677877], 
"eval_len": [78, 46, 48, 447, 46, 43, 94, 45, 72, 92]}

 60%|█████▉    | 599996/1000000 [5:50:02<2:50:44, 39.05it/s]global step 600000, trans_decision ep_re 526.3318801071654

{"global_step": 600000, "eval_re": [707.3637952355824, 1221.5953131870594, 
354.8735212794438, 1315.5310825437862, 121.96979238928265, 239.915450146582, 
119.53006886883252, 213.0694426158917, 765.942280991206, 203.5280538139881], 
"eval_len": [245, 351, 169, 410, 94, 150, 92, 120, 277, 113]}

 61%|██████    | 609998/1000000 [5:55:56<2:46:50, 38.96it/s]global step 610000, trans_decision ep_re 106.42863508575097

{"global_step": 610000, "eval_re": [43.341630667389076, 509.3706888398941, 
112.1050649768341, 44.898121503900434, 64.70862850270206, 97.85584317165565, 
69.687577362569, 71.1226156559472, 15.975360339691838, 35.22081983692627], 
"eval_len": [46, 173, 81, 45, 64, 97, 72, 81, 27, 41]}

 62%|██████▏   | 619998/1000000 [6:02:00<2:48:04, 37.68it/s]global step 620000, trans_decision ep_re 640.7381026057436

{"global_step": 620000, "eval_re": [78.22719940777769, 975.7627933150959, 
66.14968568466863, 126.64546590230606, 1918.1496527469662, 761.3077376501883, 
349.89061019302756, 2031.0762358937825, 67.6027309589122, 32.568914304711036], 
"eval_len": [75, 271, 63, 79, 487, 334, 312, 502, 67, 42]}

 63%|██████▎   | 629996/1000000 [6:07:50<2:44:23, 37.51it/s]global step 630000, trans_decision ep_re 174.6959643609022

{"global_step": 630000, "eval_re": [31.485150130052006, 34.49184008039706, 
495.69894170799927, 36.307858972937, 41.09697304181606, 42.05002461225611, 
711.3493291156337, 41.26543836120556, 284.89952110571573, 28.314566481009553], 
"eval_len": [40, 41, 177, 41, 42, 41, 237, 44, 240, 40]}

 64%|██████▍   | 639998/1000000 [6:13:40<2:37:58, 37.98it/s]global step 640000, trans_decision ep_re 459.9544533103046

{"global_step": 640000, "eval_re": [94.59523082767267, 661.594865066048, 
122.6315978433779, 245.66550828307308, 59.860069453474274, 1039.8215635354177, 
1712.3884597801461, 546.2145412619408, 45.139944088611834, 71.632752963284], 
"eval_len": [68, 209, 90, 124, 63, 275, 414, 177, 45, 68]}

 65%|██████▍   | 649998/1000000 [6:19:21<2:30:21, 38.80it/s]global step 650000, trans_decision ep_re 338.81675013792017

{"global_step": 650000, "eval_re": [645.5659110460361, 71.91036609457011, 
97.5582142536037, 2233.435297387622, 78.78663888129043, 12.12879169526444, 
54.50087479517945, 35.39818434969377, 93.47772017818659, 65.40550269775513], 
"eval_len": [232, 73, 85, 523, 72, 22, 68, 42, 75, 67]}

 66%|██████▌   | 659999/1000000 [6:25:14<2:25:20, 38.99it/s]global step 660000, trans_decision ep_re 259.3083584243113

{"global_step": 660000, "eval_re": [146.5314547703232, 32.19850530887935, 
144.00272376798077, 978.2904503333971, 153.90861172267242, 16.018585079130542, 
56.02450498745465, 473.1975889806981, 328.78489493396467, 264.12626435861193], 
"eval_len": [92, 41, 95, 432, 166, 26, 85, 182, 141, 150]}

 67%|██████▋   | 669998/1000000 [6:31:05<2:21:40, 38.82it/s]global step 670000, trans_decision ep_re 418.570947970202

{"global_step": 670000, "eval_re": [202.30573943673306, 14.961004941962356, 
1125.5302735406267, 43.32359957949582, 449.2373452061564, 610.6236981648311, 
174.1840275786649, 928.9483706036127, 248.15778301408702, 388.4376376358502], 
"eval_len": [100, 29, 315, 44, 158, 212, 115, 260, 152, 225]}

 68%|██████▊   | 679999/1000000 [6:36:57<2:16:48, 38.98it/s]global step 680000, trans_decision ep_re 115.07392180733765

{"global_step": 680000, "eval_re": [659.3101560273307, 61.111044408613324, 
75.06966649588699, 14.172156588739542, 57.46388362488276, 43.884473643419526, 
73.76922328024799, 34.959117364917894, 47.4123810316921, 83.58711560764587], 
"eval_len": [206, 68, 77, 26, 59, 45, 68, 42, 46, 74]}

 69%|██████▉   | 689996/1000000 [6:42:46<2:13:23, 38.73it/s]global step 690000, trans_decision ep_re 105.06642058075258

{"global_step": 690000, "eval_re": [54.16795100291416, 53.0938034955029, 
289.7815808399273, 112.9607922165036, 223.15320830341093, 53.41840469443157, 
60.697552951608, 40.296014015632586, 61.07621870856218, 102.01867957903265], 
"eval_len": [57, 57, 128, 83, 114, 62, 63, 45, 60, 78]}

 70%|██████▉   | 699996/1000000 [6:48:35<2:08:37, 38.87it/s]global step 700000, trans_decision ep_re 193.70217944098823

{"global_step": 700000, "eval_re": [36.070055660613626, 16.551847205676797, 
645.9902855648174, 39.30855171070852, 661.0290539172322, 32.64206219423346, 
79.09111745818848, 81.62875512829034, 304.3342569156382, 40.375808654483386], 
"eval_len": [45, 27, 221, 44, 247, 42, 67, 73, 153, 46]}

 71%|███████   | 709997/1000000 [6:54:25<2:09:20, 37.37it/s]global step 710000, trans_decision ep_re 526.1490859721819

{"global_step": 710000, "eval_re": [295.05230611410843, 23.81270969296845, 
280.4102543599454, 13.832017325678041, 1355.4137645363253, 542.0291814058851, 
1012.4828680585965, 943.7226249016151, 37.4220490915002, 757.3130842351966], 
"eval_len": [149, 39, 125, 25, 324, 199, 295, 289, 46, 235]}

 72%|███████▏  | 719999/1000000 [7:00:17<2:04:05, 37.61it/s]global step 720000, trans_decision ep_re 254.93560454969779

{"global_step": 720000, "eval_re": [30.347147137882697, 387.54456962891027, 
354.88333361545597, 329.9973021003596, 529.623996439835, 16.138924089098794, 
139.25130372468345, 58.92296274598281, 552.3392611074854, 150.30724490728372], 
"eval_len": [42, 183, 155, 152, 178, 30, 99, 62, 182, 89]}

 73%|███████▎  | 729996/1000000 [7:06:20<1:59:31, 37.65it/s]global step 730000, trans_decision ep_re 441.04510191258396

{"global_step": 730000, "eval_re": [495.0553439656909, 128.5632153888781, 
52.6176479783782, 1380.7688931736855, 701.6083079846269, 33.97274377262957, 
554.5983810819357, 26.634708126183792, 32.798327890784506, 1003.8334497630467], 
"eval_len": [187, 92, 67, 359, 237, 41, 197, 41, 41, 311]}

 74%|███████▍  | 739997/1000000 [7:12:10<1:54:56, 37.70it/s]global step 740000, trans_decision ep_re 71.90294621548313

{"global_step": 740000, "eval_re": [22.02676199339055, 122.89390179731758, 
158.71164766125068, 36.48128191644675, 167.58921552218712, 35.386768056984195, 
16.518751130843967, 60.86887569192435, 37.51650442503055, 61.0357539594554], 
"eval_len": [33, 106, 85, 40, 105, 44, 28, 62, 45, 60]}

 75%|███████▍  | 749998/1000000 [7:17:48<1:46:58, 38.95it/s]global step 750000, trans_decision ep_re 169.59962372571016

{"global_step": 750000, "eval_re": [308.6559066839161, 43.587909867180095, 
533.5053569706597, 111.77843464392147, 255.5318787121248, 132.16186467092294, 
37.054982302353274, 31.81953082354532, 115.64358497336954, 126.25678760910843], 
"eval_len": [124, 45, 202, 90, 124, 79, 44, 41, 78, 75]}

 76%|███████▌  | 759998/1000000 [7:23:39<1:42:42, 38.95it/s]global step 760000, trans_decision ep_re 62.844212843515606

{"global_step": 760000, "eval_re": [95.61829974273007, 40.846580168326334, 
27.009601821505065, 28.99370725737898, 42.67423749360315, 47.05328247714409, 
44.668497223702744, 122.25404886810777, 125.32640043699928, 53.997472945658544],
"eval_len": [86, 48, 39, 40, 47, 47, 45, 88, 80, 61]}

 77%|███████▋  | 769997/1000000 [7:29:40<1:39:00, 38.72it/s]global step 770000, trans_decision ep_re 431.53846510985005

{"global_step": 770000, "eval_re": [762.8904246271894, 259.1511441247011, 
596.2850197818506, 38.4199995528302, 1043.2568153264642, 423.5598827002178, 
82.5291820857704, 892.1223122116785, 67.00833875870728, 150.1615319290914], 
"eval_len": [221, 114, 198, 46, 274, 156, 67, 251, 61, 83]}

 78%|███████▊  | 779997/1000000 [7:35:19<1:34:20, 38.87it/s]global step 780000, trans_decision ep_re 151.8502636320449

{"global_step": 780000, "eval_re": [29.29542025727772, 424.356370830214, 
36.554304615982076, 34.279847537739194, 38.06448289646583, 37.00388809093921, 
70.96239481193614, 40.66617070148493, 36.84048928310797, 770.479267295302], 
"eval_len": [41, 170, 43, 42, 44, 42, 64, 47, 41, 227]}

 79%|███████▉  | 789997/1000000 [7:41:08<1:30:15, 38.78it/s]global step 790000, trans_decision ep_re 220.88448528816988

{"global_step": 790000, "eval_re": [36.02125589847753, 1084.7437709605313, 
14.329836209650855, 30.330622444732043, 36.06232937760144, 32.11603225673091, 
42.95929004696423, 836.6560817396024, 41.76605510823198, 53.85957883917623], 
"eval_len": [43, 332, 27, 41, 42, 42, 46, 261, 44, 64]}

 80%|███████▉  | 799999/1000000 [7:46:57<1:28:41, 37.58it/s]global step 800000, trans_decision ep_re 236.14787945827842

{"global_step": 800000, "eval_re": [42.44641123727311, 134.388374226844, 
48.504897283858476, 83.35459332187622, 367.06285567130317, 714.9414907021917, 
61.95219059606014, 523.3796986279488, 350.6649075835786, 34.78337533185007], 
"eval_len": [45, 79, 49, 69, 179, 236, 69, 218, 168, 43]}

 81%|████████  | 809999/1000000 [7:52:47<1:24:31, 37.47it/s]global step 810000, trans_decision ep_re 263.5220826507766

{"global_step": 810000, "eval_re": [39.55250882165491, 975.1802120306119, 
63.72160092859149, 42.27689302627006, 42.48264959051194, 496.72237259533466, 
369.19448474783485, 360.43299982664854, 31.805774196428484, 213.8513307438793], 
"eval_len": [42, 275, 64, 45, 45, 200, 163, 154, 40, 117]}

 82%|████████▏ | 819996/1000000 [7:58:50<1:19:12, 37.87it/s]global step 820000, trans_decision ep_re 846.8813324298842

{"global_step": 820000, "eval_re": [1859.7369082164232, 962.5483768791731, 
221.86794354071833, 28.042432140411638, 527.1241649187762, 40.14004010144837, 
2343.39224698399, 1726.653350979854, 36.277936069930554, 723.0299244681163], 
"eval_len": [454, 272, 115, 41, 183, 43, 556, 410, 40, 231]}

 83%|████████▎ | 829996/1000000 [8:04:32<1:15:01, 37.77it/s]global step 830000, trans_decision ep_re 311.2488448936724

{"global_step": 830000, "eval_re": [183.71247356749774, 13.442877904559806, 
44.35688068074316, 653.4671044466767, 134.4285947725836, 36.54838798102577, 
295.86169794467344, 174.95779803383067, 1537.972956893285, 37.73967671184799], 
"eval_len": [88, 24, 48, 212, 78, 41, 148, 85, 405, 44]}

 84%|████████▍ | 839996/1000000 [8:10:23<1:08:25, 38.97it/s]global step 840000, trans_decision ep_re 295.53031033991147

{"global_step": 840000, "eval_re": [42.26314793317339, 12.553955018558181, 
187.37413083536833, 704.8974198927245, 444.6937441704622, 665.5814884607329, 
17.962155943263177, 16.877286574629395, 156.41128054746974, 706.688494022733], 
"eval_len": [47, 23, 90, 217, 186, 272, 26, 28, 81, 223]}

 85%|████████▍ | 849998/1000000 [8:16:13<1:04:12, 38.93it/s]global step 850000, trans_decision ep_re 448.0411830393634

{"global_step": 850000, "eval_re": [282.57950236916474, 145.29732410572183, 
1795.8967930257747, 916.4618830761095, 153.97246632122395, 369.45238419417524, 
46.7177701711394, 37.53442162793657, 689.2501649547852, 43.249120547603574], 
"eval_len": [146, 81, 412, 267, 81, 204, 45, 41, 205, 44]}

 86%|████████▌ | 859997/1000000 [8:22:04<1:00:04, 38.84it/s]global step 860000, trans_decision ep_re 118.52526445445206

{"global_step": 860000, "eval_re": [215.6690329621748, 342.3881658032189, 
39.05275962137675, 94.27613647579713, 94.6940500595108, 12.430728959232967, 
44.60112607418161, 46.15382753017172, 222.3347925738262, 73.6520244850298], 
"eval_len": [114, 163, 43, 74, 98, 25, 49, 44, 108, 62]}

 87%|████████▋ | 869997/1000000 [8:27:53<55:37, 38.96it/s]global step 870000, trans_decision ep_re 49.89988162913991

{"global_step": 870000, "eval_re": [32.85928112699224, 13.440567411433378, 
72.850650719251, 35.9182058416588, 43.27585048829151, 58.964233067999686, 
38.319904132278964, 88.17387549526511, 45.10479392083304, 70.0914540873954], 
"eval_len": [40, 25, 64, 40, 44, 74, 42, 75, 46, 63]}

 88%|████████▊ | 879998/1000000 [8:33:41<51:38, 38.73it/s]global step 880000, trans_decision ep_re 86.90273365114024

{"global_step": 880000, "eval_re": [34.970232668610144, 40.78928475385976, 
35.87432658151549, 37.69960063405132, 39.38576984356639, 37.47131988174055, 
525.0592542221154, 40.24516395217936, 37.97149257784946, 39.56089139591449], 
"eval_len": [39, 47, 43, 40, 41, 45, 171, 43, 44, 41]}

 89%|████████▉ | 889997/1000000 [8:39:40<47:11, 38.85it/s]global step 890000, trans_decision ep_re 228.5851442116554

{"global_step": 890000, "eval_re": [247.93436956125808, 41.80216026398721, 
41.34888474198627, 39.39580287545894, 532.8188568996873, 150.3786429509844, 
30.587019178364866, 217.0647570161972, 559.5190000385736, 425.0019485900563], 
"eval_len": [120, 44, 41, 45, 172, 90, 39, 118, 213, 191]}

 90%|████████▉ | 899997/1000000 [8:45:30<44:15, 37.66it/s]global step 900000, trans_decision ep_re 190.31703807910102

{"global_step": 900000, "eval_re": [61.3548622780111, 36.828686744213925, 
62.04041939055708, 64.12237564922978, 546.4995026865125, 38.0513387040033, 
12.423792801397484, 753.1666575727962, 279.66447730737036, 49.018267656918695], 
"eval_len": [63, 44, 62, 63, 192, 42, 25, 213, 128, 56]}

 91%|█████████ | 909998/1000000 [8:51:20<39:49, 37.66it/s]global step 910000, trans_decision ep_re 468.1112853209076

{"global_step": 910000, "eval_re": [104.43525871831525, 963.1125891278331, 
325.30360510015385, 1383.9951466752004, 645.7717748365836, 43.991347152881545, 
185.0719775980487, 42.226331171437394, 947.6373597865794, 39.56746304204243], 
"eval_len": [88, 269, 136, 358, 207, 47, 101, 43, 269, 45]}

 92%|█████████▏| 919998/1000000 [8:57:04<35:41, 37.35it/s]global step 920000, trans_decision ep_re 128.63001922823048

{"global_step": 920000, "eval_re": [39.95845393051138, 42.284868399156004, 
43.023008622356976, 65.05914446528591, 526.212137899448, 25.927824491664463, 
432.87731660411964, 36.27020559292094, 41.836266161183836, 32.850966115657585], 
"eval_len": [46, 43, 45, 62, 188, 40, 162, 42, 43, 41]}

 93%|█████████▎| 929999/1000000 [9:02:55<30:01, 38.85it/s]global step 930000, trans_decision ep_re 356.4575259914712

{"global_step": 930000, "eval_re": [38.14930117317144, 72.18943314118546, 
38.00110044536924, 938.517510720592, 1429.799522467434, 379.93155459188995, 
44.73919409735431, 40.69738879329015, 37.69002242738573, 544.8602320570391], 
"eval_len": [45, 65, 44, 277, 355, 149, 44, 43, 42, 188]}

 94%|█████████▍| 939996/1000000 [9:09:00<25:54, 38.60it/s]global step 940000, trans_decision ep_re 514.1185882448748

{"global_step": 940000, "eval_re": [1202.6986027149655, 339.80930894445777, 
670.7345198339777, 635.0723119273242, 1006.6348258507758, 575.650166095253, 
545.3166481087966, 36.398388284578196, 53.05270735195499, 75.81840333666425], 
"eval_len": [368, 147, 204, 210, 337, 199, 240, 42, 62, 66]}

 95%|█████████▍| 949997/1000000 [9:14:43<21:29, 38.77it/s]global step 950000, trans_decision ep_re 159.57956752447768

{"global_step": 950000, "eval_re": [172.3994175524122, 46.97012893064843, 
43.86777072051312, 29.426272473663218, 1000.6584625139672, 16.34900315011633, 
13.716215001870713, 182.28715122786062, 43.451288316425504, 46.669965357299354],
"eval_len": [88, 48, 46, 39, 285, 27, 24, 140, 43, 44]}

 96%|█████████▌| 959999/1000000 [9:20:35<17:09, 38.84it/s]global step 960000, trans_decision ep_re 210.7443297675598

{"global_step": 960000, "eval_re": [15.35667437205374, 42.26601968830786, 
38.60612523984321, 74.87564074971401, 38.542331088252226, 36.09879151302462, 
864.1101556504509, 36.627840298195515, 46.71862089499767, 914.2410981807584], 
"eval_len": [26, 47, 44, 67, 45, 42, 245, 44, 54, 257]}

 97%|█████████▋| 969997/1000000 [9:26:26<12:53, 38.76it/s]global step 970000, trans_decision ep_re 323.5455173283773

{"global_step": 970000, "eval_re": [95.36809330670168, 63.22792123783483, 
334.1958626079521, 826.018644255413, 413.0201896744953, 54.10258604125163, 
80.14862389563484, 227.1216615018559, 36.03018334784779, 1106.2214074147855], 
"eval_len": [77, 62, 151, 248, 160, 60, 66, 128, 42, 292]}

 98%|█████████▊| 979996/1000000 [9:32:30<08:40, 38.40it/s]global step 980000, trans_decision ep_re 347.46473734172685

{"global_step": 980000, "eval_re": [1036.833826538977, 67.96076337255356, 
43.32118522179028, 678.3243280502435, 44.787898231984045, 44.791165587121384, 
38.10617319949064, 699.6115669392431, 37.82698807384595, 783.0834782020187], 
"eval_len": [292, 64, 43, 210, 46, 47, 41, 236, 42, 236]}

 99%|█████████▉| 989999/1000000 [9:38:11<04:14, 39.24it/s]global step 990000, trans_decision ep_re 161.75280147255222

{"global_step": 990000, "eval_re": [34.85442539750581, 798.7573912347732, 
149.6949314751334, 44.91474967637797, 41.28036307793838, 44.30172560259097, 
17.098632347379183, 34.17591488189511, 30.743010270924728, 421.7068707610038], 
"eval_len": [41, 240, 98, 47, 49, 46, 27, 43, 42, 167]}

100%|█████████▉| 999996/1000000 [9:44:01<00:00, 38.67it/s]global step 1000000, trans_decision ep_re 613.6486147888049

{"global_step": 1000000, "eval_re": [1172.7498400011218, 49.40280054852517, 
936.9226275479795, 14.186336828297838, 967.5074864269804, 1110.0349831260821, 
36.55898896504764, 981.1175216000996, 118.81868873320295, 749.1868741107121], 
"eval_len": [317, 47, 272, 27, 303, 297, 43, 288, 90, 228]}

100%|██████████| 1000000/1000000 [9:44:15<00:00, 28.53it/s]
