
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 2,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9995/1000000 [02:14<5:13:35, 52.62it/s]global step 10000, trans_decision ep_re 216.5331838650872

{"global_step": 10000, "eval_re": [230.38149222313055, 274.8677216466597, 
229.3123660898093, 228.1476783509216, 54.36636350937101, 235.57182351306096, 
226.5565815631144, 228.2551577883696, 227.1951781474478, 230.6774758189874], 
"eval_len": [102, 149, 104, 102, 33, 109, 101, 102, 101, 104]}

  2%|▏         | 19995/1000000 [06:34<5:36:50, 48.49it/s]global step 20000, trans_decision ep_re 191.95406455822348

{"global_step": 20000, "eval_re": [209.73018299813293, 189.32793190834573, 
215.6318661445715, 126.65957856036009, 239.39019062806454, 216.10798705532707, 
200.7903523535451, 115.34464403137908, 200.14130781104305, 206.4166040914655], 
"eval_len": [99, 93, 98, 70, 111, 100, 93, 75, 107, 98]}

  3%|▎         | 29997/1000000 [10:55<5:04:28, 53.10it/s]global step 30000, trans_decision ep_re 293.6492030564148

{"global_step": 30000, "eval_re": [272.29207126540354, 450.6334562288261, 
155.14795118501536, 148.1833951810786, 284.4952404296168, 186.8303171264033, 
455.1897540325131, 288.85522262320205, 597.8368885295195, 97.0277339625701], 
"eval_len": [225, 394, 102, 104, 230, 142, 407, 235, 545, 60]}

  4%|▍         | 39998/1000000 [15:27<7:03:30, 37.78it/s]global step 40000, trans_decision ep_re 282.7068958653916

{"global_step": 40000, "eval_re": [296.28134429816305, 333.24163572213433, 
267.0336794677475, 332.1897280333417, 309.7074201487937, 322.3054996756174, 
241.8989277499129, 325.84665875052764, 85.94186171105909, 312.6222030966184], 
"eval_len": [127, 139, 120, 135, 137, 142, 102, 140, 56, 143]}

  5%|▍         | 49994/1000000 [19:50<4:58:54, 52.97it/s]global step 50000, trans_decision ep_re 158.52000903591176

{"global_step": 50000, "eval_re": [241.65796544589216, 254.98200075649459, 
14.85189083944673, 41.95103782205498, 43.588600407249416, 336.28895866324774, 
170.32881815457293, 61.17022981341421, 44.0646639350591, 376.3159245216854], 
"eval_len": [101, 107, 15, 47, 31, 133, 79, 52, 39, 177]}

  6%|▌         | 59994/1000000 [24:14<4:44:34, 55.05it/s]global step 60000, trans_decision ep_re 394.08637907589787

{"global_step": 60000, "eval_re": [433.65316108244787, 441.118766103847, 
364.460766290878, 425.39839766911024, 452.78557858336757, 450.1447143506628, 
397.00066445690305, 476.8394522062836, 46.327888389964635, 453.13440162551404], 
"eval_len": [167, 170, 146, 165, 178, 173, 160, 192, 33, 178]}

  7%|▋         | 69999/1000000 [28:42<9:47:37, 26.38it/s] global step 70000, trans_decision ep_re 320.6252628852495

{"global_step": 70000, "eval_re": [445.1615298729391, 19.457371557878286, 
447.2523468254321, 460.00691254589833, 147.93735290212362, 466.00051843185474, 
267.4758395850185, 471.51678132001933, 111.7040575525316, 369.7399182587997], 
"eval_len": [165, 18, 177, 170, 71, 171, 118, 165, 61, 181]}

  8%|▊         | 79997/1000000 [33:17<6:18:27, 40.52it/s]global step 80000, trans_decision ep_re 535.1112759654181

{"global_step": 80000, "eval_re": [410.1189131366131, 258.7814290206778, 
320.28269081774783, 34.601977011174746, 1358.916123878444, 541.6904173686602, 
167.17203583088116, 983.1983392744556, 1053.0594048911287, 223.29142842439904], 
"eval_len": [179, 112, 135, 31, 433, 170, 81, 320, 350, 98]}

  9%|▉         | 89996/1000000 [37:46<4:48:32, 52.56it/s]global step 90000, trans_decision ep_re 463.6850076449

{"global_step": 90000, "eval_re": [314.1706879701438, 462.81911663446954, 
90.33442133992962, 33.118046433689415, 1206.215530043575, 593.8341706359148, 
1108.7050324931045, 571.723784607332, 37.614509359731024, 218.314776931111], 
"eval_len": [146, 177, 60, 37, 419, 241, 376, 211, 44, 141]}

 10%|▉         | 99999/1000000 [42:10<6:19:17, 39.55it/s]global step 100000, trans_decision ep_re 288.3411148721268

{"global_step": 100000, "eval_re": [120.33656228990489, 323.81590483240245, 
298.58784130262273, 641.0154901235471, 281.04791384267423, 274.0459080092667, 
259.948026101531, 83.95298714908128, 393.83781837411277, 206.8226966961247], 
"eval_len": [68, 146, 140, 256, 136, 135, 132, 55, 243, 98]}

 11%|█         | 109999/1000000 [46:47<4:48:01, 51.50it/s]global step 110000, trans_decision ep_re 531.857395959464

{"global_step": 110000, "eval_re": [977.6971325752127, 165.8906587251436, 
670.3409369002991, 1004.9694663854208, 871.8831197094266, 898.2342094474042, 
234.33940570116252, 216.91548906678912, 33.23616558802272, 245.06737549575948], 
"eval_len": [324, 78, 216, 330, 311, 325, 105, 99, 42, 119]}

 12%|█▏        | 119999/1000000 [51:14<4:23:58, 55.56it/s]global step 120000, trans_decision ep_re 910.4209686572254

{"global_step": 120000, "eval_re": [1090.6046249449491, 602.9682849539375, 
238.48087060708457, 320.91077249709406, 1748.5598154924917, 597.3214485266486, 
1994.910418102385, 666.0165852792995, 309.5796648778623, 1534.8572012905008], 
"eval_len": [332, 222, 105, 136, 550, 209, 646, 252, 139, 470]}

 13%|█▎        | 129995/1000000 [55:47<5:53:55, 40.97it/s]global step 130000, trans_decision ep_re 723.4809827256511

{"global_step": 130000, "eval_re": [1130.6351164187877, 411.9590208965867, 
1290.0239441106924, 292.97617797651264, 1237.7678242461357, 119.37804316488558, 
1368.0931250035526, 202.40299200790938, 525.1450234827339, 656.4285599487146], 
"eval_len": [368, 147, 422, 128, 394, 65, 471, 109, 205, 248]}

 14%|█▍        | 139995/1000000 [1:00:22<4:17:50, 55.59it/s]global step 140000, trans_decision ep_re 654.6632255660364

{"global_step": 140000, "eval_re": [1074.7980487098923, 331.0786995416324, 
178.94970905530337, 836.2266951827338, 797.3059851459101, 456.9254321785537, 
1465.1737185455254, 1200.9659463178493, 132.66782876628898, 72.54019221667514], 
"eval_len": [397, 132, 83, 278, 258, 162, 469, 362, 67, 57]}

 15%|█▍        | 149998/1000000 [1:04:46<5:49:46, 40.50it/s]global step 150000, trans_decision ep_re 609.7020756508223

{"global_step": 150000, "eval_re": [478.3927344601543, 332.92283341187573, 
907.0538661911329, 957.107284002066, 560.130511200317, 682.4431059728547, 
24.340451442319264, 753.269198491381, 515.365777321963, 885.9949940141593], 
"eval_len": [171, 134, 271, 290, 192, 210, 24, 241, 178, 265]}

 16%|█▌        | 159999/1000000 [1:09:17<4:12:12, 55.51it/s]global step 160000, trans_decision ep_re 379.7564468209969

{"global_step": 160000, "eval_re": [68.06027023657218, 42.902328074989526, 
788.1624140191813, 151.26006499353784, 507.71598181475326, 149.1858703480863, 
404.1869724749888, 647.6891368745778, 394.6885744325583, 643.7128549407238], 
"eval_len": [45, 25, 293, 92, 184, 111, 152, 257, 150, 252]}

 17%|█▋        | 169997/1000000 [1:14:02<5:54:46, 38.99it/s]global step 170000, trans_decision ep_re 987.142604369172

{"global_step": 170000, "eval_re": [1614.7977188561995, 761.2018844984661, 
814.6966826306415, 35.289625543391864, 708.022494483057, 2361.1068901012172, 
2776.2448201435122, 475.9533787677733, 20.383201508201, 303.7293471592582], 
"eval_len": [517, 258, 289, 47, 248, 742, 864, 178, 24, 129]}

 18%|█▊        | 179999/1000000 [1:18:14<5:34:03, 40.91it/s]global step 180000, trans_decision ep_re 781.2735782092508

{"global_step": 180000, "eval_re": [1037.6636309702485, 3281.3221812536153, 
38.029821896567455, 316.6134248548247, 465.5319491219946, 182.96178640504962, 
125.16983082225902, 232.42112501042217, 775.8394575882968, 1357.1825741692298], 
"eval_len": [327, 1000, 36, 138, 169, 88, 66, 124, 257, 413]}

 19%|█▉        | 189995/1000000 [1:22:31<4:16:37, 52.61it/s]global step 190000, trans_decision ep_re 594.4239086950962

{"global_step": 190000, "eval_re": [830.8418905931113, 495.5841894235993, 
214.594962983922, 122.65419842910646, 1084.75421094223, 1265.3490410401173, 
34.03141802711867, 34.175858520762674, 895.2790986718911, 966.9742183191024], 
"eval_len": [273, 174, 106, 93, 321, 377, 30, 39, 272, 313]}

 20%|█▉        | 199995/1000000 [1:26:46<4:10:13, 53.29it/s]global step 200000, trans_decision ep_re 640.5541908016028

{"global_step": 200000, "eval_re": [828.0083538857183, 342.40020995606704, 
1118.2265254938366, 1065.8442651369176, 961.9811767914359, 1012.4127881263936, 
185.36791270648, 34.745184369498475, 828.3244038448402, 28.231087704838888], 
"eval_len": [286, 136, 327, 315, 292, 321, 85, 36, 291, 29]}

 21%|██        | 209999/1000000 [1:31:06<4:05:49, 53.56it/s]global step 210000, trans_decision ep_re 789.7710595645402

{"global_step": 210000, "eval_re": [1101.8403737214098, 300.0211376070533, 
853.4971916965802, 713.2381890429723, 362.88951946361567, 1507.2813384513556, 
1225.8514345823426, 1218.4481064562447, 364.6127254733717, 250.03057915045596], 
"eval_len": [360, 121, 298, 228, 142, 497, 435, 362, 144, 106]}

 22%|██▏       | 219995/1000000 [1:35:37<3:50:29, 56.40it/s]global step 220000, trans_decision ep_re 468.0628980366776

{"global_step": 220000, "eval_re": [331.3020449278633, 513.5173476366793, 
1638.047093686417, 69.57295241981501, 739.0608005278808, 306.880100658092, 
37.61395652427852, 46.290947204188306, 36.204792709759104, 962.1389440718027], 
"eval_len": [136, 195, 535, 46, 255, 141, 38, 46, 39, 321]}

 23%|██▎       | 229995/1000000 [1:39:56<5:17:02, 40.48it/s]global step 230000, trans_decision ep_re 446.708990058161

{"global_step": 230000, "eval_re": [1084.8783836284492, 675.8930418592871, 
1068.9582819857658, 889.9322471084473, 54.56186275317049, 141.1675471691541, 
164.8810134942008, 158.37125782560796, 54.47280481012892, 173.97345994739877], 
"eval_len": [331, 247, 341, 284, 34, 80, 82, 114, 39, 113]}

 24%|██▍       | 239994/1000000 [1:44:25<3:43:11, 56.75it/s]global step 240000, trans_decision ep_re 717.1911488289295

{"global_step": 240000, "eval_re": [283.44768875193586, 1190.8931450405476, 
1039.6218659818758, 133.23722993078508, 1320.6496454709134, 406.3618089949217, 
37.88321645644846, 235.45524385334113, 354.7451820747613, 2169.6164617337645], 
"eval_len": [115, 352, 336, 72, 440, 152, 39, 99, 141, 676]}

 25%|██▍       | 249999/1000000 [1:48:53<5:22:08, 38.80it/s]global step 250000, trans_decision ep_re 776.3759223855543

{"global_step": 250000, "eval_re": [128.88043897577094, 1706.8258810535142, 
47.41598276996712, 24.338549701267812, 34.011929101784794, 731.160785801917, 
634.4248478282412, 3076.4768720001225, 391.9798221447456, 988.2441144782116], 
"eval_len": [71, 558, 48, 28, 37, 265, 236, 1000, 150, 320]}

 26%|██▌       | 259995/1000000 [1:53:08<3:40:19, 55.98it/s]global step 260000, trans_decision ep_re 519.6165084815947

{"global_step": 260000, "eval_re": [105.84408272461398, 226.5286467737213, 
41.87665611901295, 1871.3396934719942, 974.4180404532697, 226.25420574940088, 
609.5948882364427, 239.90063664353335, 478.7373045235896, 421.6709301203678], 
"eval_len": [77, 95, 52, 632, 299, 95, 206, 100, 169, 179]}

 27%|██▋       | 269997/1000000 [1:57:18<3:35:37, 56.43it/s]global step 270000, trans_decision ep_re 899.4627255774333

{"global_step": 270000, "eval_re": [1661.445015468285, 793.295399109082, 
920.151072340933, 1251.486713767471, 26.88028009405894, 1034.3474199497687, 
1501.0343900324303, 326.48075648269565, 118.1910926584161, 1361.3151158711917], 
"eval_len": [534, 251, 305, 377, 35, 319, 476, 143, 64, 436]}

 28%|██▊       | 279994/1000000 [2:01:41<3:35:45, 55.62it/s]global step 280000, trans_decision ep_re 332.30552084459066

{"global_step": 280000, "eval_re": [76.5347110679047, 965.3014444718947, 
31.925512152028528, 33.74956022814443, 893.1635171531125, 58.077305564173535, 
35.37255530562847, 905.0442756498546, 303.51855755113314, 20.367769302032393], 
"eval_len": [68, 319, 38, 35, 300, 41, 36, 335, 143, 16]}

 29%|██▉       | 289999/1000000 [2:05:55<3:46:17, 52.29it/s]global step 290000, trans_decision ep_re 844.9993338043221

{"global_step": 290000, "eval_re": [238.68537922391016, 2163.3969282882576, 
887.9059734761362, 441.9864609181727, 618.0123037868768, 147.44397587536685, 
1591.3662126698878, 1104.0575860010256, 1220.4529950812077, 36.68552272237823], 
"eval_len": [103, 703, 293, 181, 237, 97, 526, 340, 399, 39]}

 30%|██▉       | 299997/1000000 [2:10:18<3:25:54, 56.66it/s]global step 300000, trans_decision ep_re 1082.0613703120293

{"global_step": 300000, "eval_re": [744.280786525207, 588.9511201771677, 
47.825206291608595, 1262.770769358177, 144.9712445773766, 2040.8926076979853, 
1772.2479933849158, 458.9405971047697, 2504.4804395598403, 1255.2529384432446], 
"eval_len": [264, 204, 61, 406, 71, 644, 545, 166, 777, 411]}

 31%|███       | 309999/1000000 [2:14:49<4:50:21, 39.61it/s]global step 310000, trans_decision ep_re 511.83395068878417

{"global_step": 310000, "eval_re": [531.2887579425939, 817.8372440157933, 
866.7800929110326, 38.2591782927261, 197.4104622100684, 542.1945195688836, 
168.67662663748982, 1349.2067037961924, 181.4801005202836, 425.2058209927776], 
"eval_len": [188, 293, 294, 37, 107, 191, 95, 429, 83, 158]}

 32%|███▏      | 319999/1000000 [2:19:13<3:22:34, 55.95it/s]global step 320000, trans_decision ep_re 1012.6814876615736

{"global_step": 320000, "eval_re": [1547.5389133482859, 2917.7817622501984, 
316.549998456212, 469.6755606413881, 152.05672624844766, 838.1915205620342, 
1895.2897979631036, 977.0120962048661, 31.374021233476917, 981.344479707723], 
"eval_len": [501, 924, 128, 167, 94, 331, 590, 319, 33, 346]}

 33%|███▎      | 329995/1000000 [2:22:58<3:20:50, 55.60it/s]global step 330000, trans_decision ep_re 630.6784077413305

{"global_step": 330000, "eval_re": [2.6817540927472754, 190.351353211322, 
719.4810575063865, 1018.0245888039528, 67.3616903282531, 449.42428487653837, 
179.9267694081276, 1518.6034837612763, 1376.2703131263913, 784.658782298309], 
"eval_len": [15, 93, 243, 331, 45, 163, 97, 503, 405, 259]}

 34%|███▍      | 339998/1000000 [2:27:14<3:27:05, 53.12it/s]global step 340000, trans_decision ep_re 637.5243073611163

{"global_step": 340000, "eval_re": [1032.6303082401478, 388.9435623943978, 
304.33826712889106, 437.033369147476, 425.4186923026366, 290.5040991939449, 
255.53829219135955, 312.0272063725311, 1505.5153940452124, 1423.293882594566], 
"eval_len": [342, 178, 122, 160, 179, 138, 127, 124, 490, 491]}

 35%|███▍      | 349995/1000000 [2:31:13<3:11:16, 56.64it/s]global step 350000, trans_decision ep_re 508.8247669146047

{"global_step": 350000, "eval_re": [723.0610146927054, 161.58697021829545, 
885.5323848519513, 183.86174996706484, 226.22947214387568, 748.2350633021247, 
237.70476569372704, 180.86974592248387, 1563.7985020936749, 177.36800026014404],
"eval_len": [243, 105, 311, 89, 95, 298, 145, 83, 484, 82]}

 36%|███▌      | 359995/1000000 [2:35:07<3:14:20, 54.89it/s]global step 360000, trans_decision ep_re 465.34807219774785

{"global_step": 360000, "eval_re": [68.0856024432389, 652.5848595980088, 
153.91303959432034, 506.4519220980828, 198.26154475671012, 480.5289734459948, 
848.2454668852029, 1109.3344278056009, 151.78112712206413, 484.2937582282549], 
"eval_len": [48, 218, 77, 179, 103, 195, 282, 331, 75, 154]}

 37%|███▋      | 369997/1000000 [2:38:53<3:05:03, 56.74it/s]global step 370000, trans_decision ep_re 748.4264270535874

{"global_step": 370000, "eval_re": [538.1464648476748, 558.6825343181015, 
1043.6401217503974, 2039.7284451572598, 1821.699772946142, 619.7365404350257, 
17.705222996791527, 22.099438728213524, 584.467505826851, 238.35822352941605], 
"eval_len": [189, 213, 353, 610, 575, 217, 16, 24, 208, 138]}

 38%|███▊      | 379999/1000000 [2:42:41<3:03:17, 56.38it/s]global step 380000, trans_decision ep_re 636.4170399315104

{"global_step": 380000, "eval_re": [968.9871671053714, 1555.6098177961455, 
1578.8996358167537, 233.0583980954823, 25.861473701822536, 458.0853838577617, 
850.9600991862915, 62.007301309018914, 128.20025604857102, 502.500866397886], 
"eval_len": [314, 492, 505, 97, 29, 177, 275, 38, 97, 178]}

 39%|███▉      | 389995/1000000 [2:46:30<2:59:59, 56.48it/s]global step 390000, trans_decision ep_re 819.4862176354607

{"global_step": 390000, "eval_re": [156.2513768829582, 1005.295485478287, 
376.2937523410301, 451.8765874312645, 1705.0052584682767, 1429.6481710638811, 
1658.8576640267786, 222.83388138193126, 709.1677550907739, 479.6322441894263], 
"eval_len": [88, 335, 183, 182, 538, 465, 526, 129, 243, 171]}

 40%|███▉      | 399997/1000000 [2:50:18<2:56:33, 56.64it/s]global step 400000, trans_decision ep_re 650.6036776157043

{"global_step": 400000, "eval_re": [686.8939119672099, 415.5791832436519, 
248.51716481199844, 775.5857118986625, 1491.5805934461393, 866.8586016814592, 
74.80049493437024, 654.1814981600193, 598.8837556375296, 693.1558603760034], 
"eval_len": [215, 152, 123, 310, 502, 296, 49, 211, 221, 230]}

 41%|████      | 409999/1000000 [2:54:05<2:55:37, 55.99it/s]global step 410000, trans_decision ep_re 832.3224396167479

{"global_step": 410000, "eval_re": [1988.3010328647022, 441.0611138339527, 
805.5557881119886, 492.6027396616795, 75.85920451159399, 886.1499625958593, 
25.231280993217368, 403.0067501824184, 1682.1699413215565, 1523.2865820905108], 
"eval_len": [606, 161, 282, 175, 58, 284, 28, 162, 514, 451]}

 42%|████▏     | 419995/1000000 [2:57:55<2:52:13, 56.13it/s]global step 420000, trans_decision ep_re 1113.5117845903383

{"global_step": 420000, "eval_re": [410.94277779594285, 836.7498658997278, 
2056.18516027084, 410.55365677188485, 314.3007142893319, 2776.371706877267, 
2128.461673321201, 470.3057243129763, 1062.495251733394, 668.7513146308187], 
"eval_len": [150, 275, 629, 150, 132, 854, 615, 171, 331, 229]}

 43%|████▎     | 429997/1000000 [3:01:45<2:47:31, 56.71it/s]global step 430000, trans_decision ep_re 471.27415873810224

{"global_step": 430000, "eval_re": [62.6397558690845, 910.356426972289, 
379.8613465539247, 596.3728438301714, 581.8858463351281, 29.429671785448104, 
42.99482053652226, 1034.1757321690357, 90.66658289217996, 984.358560437239], 
"eval_len": [61, 318, 142, 201, 250, 34, 39, 334, 73, 306]}

 44%|████▍     | 439999/1000000 [3:05:44<2:46:32, 56.04it/s]global step 440000, trans_decision ep_re 765.0463319353964

{"global_step": 440000, "eval_re": [109.122417325293, 80.26208923183938, 
441.30617179458807, 3255.550511148478, 707.0063767687624, 633.705531182123, 
468.18988210367354, 528.8985030827928, 1098.5878004325157, 327.8340362838983], 
"eval_len": [103, 51, 158, 1000, 238, 215, 168, 203, 357, 139]}

 45%|████▍     | 449995/1000000 [3:09:29<2:54:08, 52.64it/s]global step 450000, trans_decision ep_re 968.8030999874609

{"global_step": 450000, "eval_re": [1042.9126385037255, 1792.591076429365, 
2175.121174912777, 704.9637506722403, 832.7236652087654, 730.2637065330749, 
59.689321449481845, 1052.2170911342703, 547.2849249226575, 750.263650108251], 
"eval_len": [323, 527, 620, 223, 267, 244, 83, 354, 189, 226]}

 46%|████▌     | 459997/1000000 [3:13:34<2:49:32, 53.08it/s]global step 460000, trans_decision ep_re 696.0298107629842

{"global_step": 460000, "eval_re": [672.915125153353, 1514.8687627898607, 
905.672054638617, 166.47846596437773, 680.5875105635429, 291.77786377297264, 
23.419251504265798, 1075.9502411083042, 1200.2116241092588, 428.4172080252884], 
"eval_len": [221, 442, 275, 76, 262, 121, 26, 344, 323, 149]}

 47%|████▋     | 469997/1000000 [3:17:44<2:35:56, 56.65it/s]global step 470000, trans_decision ep_re 740.9980502840655

{"global_step": 470000, "eval_re": [475.4360498458049, 202.205522828574, 
225.661788629224, 1678.9492977305251, 1199.6066888611938, 966.5247786171733, 
56.64194794775695, 1442.7968105750945, 80.62303670929968, 1081.534581096009], 
"eval_len": [173, 89, 115, 498, 323, 284, 59, 393, 55, 301]}

 48%|████▊     | 479996/1000000 [3:22:08<3:27:43, 41.72it/s]global step 480000, trans_decision ep_re 579.9298338217657

{"global_step": 480000, "eval_re": [1197.9799006000676, 130.07198957728636, 
1813.1717867333173, 2.8210883371968314, 80.97835040117266, 1118.7583509655553, 
602.866494531023, 69.74310944642977, 44.008598476928455, 738.8986691486797], 
"eval_len": [320, 67, 500, 14, 53, 315, 209, 49, 57, 240]}

 49%|████▉     | 489996/1000000 [3:26:29<2:30:57, 56.31it/s]global step 490000, trans_decision ep_re 546.3119258189107

{"global_step": 490000, "eval_re": [96.7252553087221, 384.54060675682575, 
1210.8449641383932, 754.6464436178652, 947.1628308918224, 451.0411561736867, 
245.58317701284852, 29.684643569327662, 333.0141889058236, 1009.8759918137918], 
"eval_len": [70, 157, 338, 237, 341, 177, 113, 31, 123, 289]}

 50%|████▉     | 499996/1000000 [3:30:34<2:27:46, 56.39it/s]global step 500000, trans_decision ep_re 837.8002086123379

{"global_step": 500000, "eval_re": [244.19040996138546, 2389.248711125063, 
1210.895341088731, 196.37189338447033, 209.41602724662576, 231.5438773737173, 
186.09029417727453, 1704.3358667210032, 29.089179655629202, 1976.8204853894788],
"eval_len": [99, 663, 337, 93, 102, 118, 82, 497, 35, 539]}

 51%|█████     | 509998/1000000 [3:34:34<2:25:16, 56.22it/s]global step 510000, trans_decision ep_re 884.3915524456852

{"global_step": 510000, "eval_re": [1239.1178734011535, 1108.213164910618, 
1709.3539475864545, 75.35584564618826, 164.1951198909905, 459.9447314539323, 
1881.3664583177808, 710.5230730491475, 127.64957405808401, 1368.1957361425023], 
"eval_len": [341, 307, 476, 50, 79, 159, 547, 228, 73, 398]}

 52%|█████▏    | 519994/1000000 [3:38:24<2:21:22, 56.59it/s]global step 520000, trans_decision ep_re 876.3077176592393

{"global_step": 520000, "eval_re": [418.0534505567851, 237.3510510035628, 
2.988529622425488, 1461.8205993876452, 384.80447648177943, 421.0128490943325, 
414.13274456997124, 2532.154894657402, 1022.6623305719537, 1868.0962506465348], 
"eval_len": [159, 148, 14, 440, 145, 166, 158, 735, 301, 544]}

 53%|█████▎    | 529996/1000000 [3:42:02<2:18:04, 56.73it/s]global step 530000, trans_decision ep_re 615.650407673133

{"global_step": 530000, "eval_re": [163.32756798574007, 1030.748266813707, 
58.40364568870268, 561.0222530821807, 1029.3895877240043, 2.659635712624241, 
1055.896522341802, 135.88911698187997, 963.7056776049499, 1155.4618027957397], 
"eval_len": [98, 345, 49, 184, 281, 15, 312, 88, 273, 312]}

 54%|█████▍    | 539998/1000000 [3:45:48<2:17:25, 55.79it/s]global step 540000, trans_decision ep_re 737.6696012914384

{"global_step": 540000, "eval_re": [953.4026839017521, 31.914497094572436, 
856.226382713747, 536.3218331975793, 1357.526966702051, 449.078033570609, 
755.2404968228687, 209.32084138499903, 2171.076108768999, 56.58816875720687], 
"eval_len": [296, 36, 261, 221, 400, 153, 243, 100, 615, 41]}

 55%|█████▍    | 549994/1000000 [3:49:36<2:12:06, 56.77it/s]global step 550000, trans_decision ep_re 662.8165160534816

{"global_step": 550000, "eval_re": [1957.5209879923698, 358.9006427646161, 
184.87598668688693, 24.841675411569714, 497.6511334846538, 29.334033825473732, 
1222.3580770397, 783.8969438737166, 1077.7011615603915, 491.0845178954387], 
"eval_len": [569, 132, 95, 28, 182, 35, 354, 256, 293, 177]}

 56%|█████▌    | 559996/1000000 [3:53:28<2:09:01, 56.84it/s]global step 560000, trans_decision ep_re 214.04216409525975

{"global_step": 560000, "eval_re": [80.15273535853473, 358.51484897076807, 
61.2624555232624, 28.975456446053293, 323.42663995880383, 129.80580986455823, 
43.308095286463264, 30.65684769962452, 449.4335142196806, 634.8852376248486], 
"eval_len": [61, 158, 63, 21, 120, 66, 25, 38, 167, 202]}

 57%|█████▋    | 569998/1000000 [3:57:13<2:06:10, 56.80it/s]global step 570000, trans_decision ep_re 54.34273977629688

{"global_step": 570000, "eval_re": [27.049754996957034, 58.85656752019582, 
55.84101582438654, 55.5850546377404, 59.44951321970067, 31.78779427054719, 
62.02193677325374, 77.25891980899445, 57.72326515000229, 57.85357556119056], 
"eval_len": [32, 50, 42, 42, 44, 29, 35, 54, 43, 43]}

 58%|█████▊    | 579994/1000000 [4:00:55<2:04:05, 56.41it/s]global step 580000, trans_decision ep_re 214.01511621751314

{"global_step": 580000, "eval_re": [104.75953595386271, 222.9233585371097, 
313.67024832215327, 201.30030356801086, 100.50790706776067, 120.62450639673892, 
137.14016661453923, 322.40692520792027, 263.59945454071595, 353.21875596632], 
"eval_len": [66, 100, 117, 88, 88, 73, 73, 117, 104, 125]}

 59%|█████▉    | 589996/1000000 [4:04:40<2:00:09, 56.87it/s]global step 590000, trans_decision ep_re 324.28205549574403

{"global_step": 590000, "eval_re": [166.01973132636522, 366.49164896270605, 
465.5917165175298, 139.25634026651463, 50.16995013659619, 365.38524493956953, 
457.66322620686526, 393.40082583921793, 463.2354786453262, 375.60639211674953], 
"eval_len": [76, 128, 153, 67, 30, 145, 190, 139, 172, 130]}

 60%|█████▉    | 599998/1000000 [4:08:25<1:56:56, 57.01it/s]global step 600000, trans_decision ep_re 158.71641298856161

{"global_step": 600000, "eval_re": [197.14972554615755, 300.50847595415814, 
194.11571436674504, 42.8696265102641, 88.61834648693632, 66.41311081645482, 
172.29526377637427, 175.69511676055504, 185.13120360209896, 164.36754606587195],
"eval_len": [85, 114, 85, 34, 54, 47, 79, 80, 83, 77]}

 61%|██████    | 609994/1000000 [4:12:08<1:53:21, 57.34it/s]global step 610000, trans_decision ep_re 34.37186205486056

{"global_step": 610000, "eval_re": [31.432041723610133, 29.42187400040752, 
31.138344200992453, 59.53780145490599, 28.009734632133362, 35.1790759804428, 
29.270768931234564, 38.976457968716474, 29.68211778070789, 31.070403875454353], 
"eval_len": [26, 25, 26, 57, 24, 28, 25, 30, 25, 26]}

 62%|██████▏   | 619996/1000000 [4:15:51<1:51:59, 56.55it/s]global step 620000, trans_decision ep_re 357.62858659085936

{"global_step": 620000, "eval_re": [692.4111797953495, 166.5665217599676, 
600.4000836570394, 266.86587540244494, 637.2799881644323, 359.6551589242389, 
58.60441965357389, 220.39429966164766, 38.30824768144408, 535.800091208455], 
"eval_len": [205, 75, 183, 104, 190, 127, 36, 109, 43, 170]}

 63%|██████▎   | 629998/1000000 [4:19:36<1:48:43, 56.72it/s]global step 630000, trans_decision ep_re 152.49798811395723

{"global_step": 630000, "eval_re": [183.6522648256301, 178.55182068601988, 
155.89564209845452, 60.55178377338319, 198.02352027326327, 114.87878280061877, 
177.78048673223947, 161.5165354697309, 139.42535414996763, 154.70369033026472], 
"eval_len": [83, 81, 73, 37, 117, 62, 81, 75, 68, 75]}

 64%|██████▍   | 639999/1000000 [4:23:20<1:47:02, 56.05it/s]global step 640000, trans_decision ep_re 146.71386297121188

{"global_step": 640000, "eval_re": [147.30702591408593, 112.68570374926139, 
222.2652506710473, 170.46675301057053, 147.5163649708542, 145.43699924790687, 
145.789433675465, 132.821150124536, 166.88749134293033, 75.9624570054611], 
"eval_len": [80, 64, 98, 90, 81, 80, 81, 73, 99, 47]}

 65%|██████▍   | 649994/1000000 [4:27:06<1:42:52, 56.70it/s]global step 650000, trans_decision ep_re 170.67585349418547

{"global_step": 650000, "eval_re": [173.76357499251637, 173.60161101214348, 
166.80095368430338, 166.5933346570255, 194.2937861055031, 114.67167564286041, 
156.20777075607901, 187.3961138524051, 166.22026144261125, 207.20945279640736], 
"eval_len": [79, 79, 77, 77, 85, 62, 74, 83, 77, 88]}

 66%|██████▌   | 659996/1000000 [4:30:51<1:40:42, 56.27it/s]global step 660000, trans_decision ep_re 223.04866081125502

{"global_step": 660000, "eval_re": [247.40487577620712, 148.00268767561855, 
286.1340703618545, 244.1924882968194, 247.830144700418, 65.26111882542672, 
206.20261422947686, 300.44353575721215, 243.9202246750925, 241.0948478144243], 
"eval_len": [100, 77, 143, 99, 100, 40, 88, 166, 99, 98]}

 67%|██████▋   | 669998/1000000 [4:34:37<1:36:39, 56.90it/s]global step 670000, trans_decision ep_re 270.8599551338639

{"global_step": 670000, "eval_re": [259.9787493770001, 410.3747296937917, 
166.65081173651606, 281.8588511249604, 246.11700953213085, 223.85108934693216, 
611.6367614584296, 74.98174539557884, 147.33799643068886, 285.81180724261054], 
"eval_len": [106, 150, 77, 134, 101, 102, 216, 46, 84, 134]}

 68%|██████▊   | 679994/1000000 [4:38:22<1:34:07, 56.67it/s]global step 680000, trans_decision ep_re 168.70895200558002

{"global_step": 680000, "eval_re": [194.00652101745817, 186.3507621347605, 
58.626357052844845, 206.0747227515065, 195.53504436659588, 193.933737971069, 
170.08147794082905, 96.57164182908475, 189.70422576717783, 196.20502922447352], 
"eval_len": [86, 84, 36, 102, 87, 86, 79, 62, 85, 87]}

 69%|██████▉   | 689996/1000000 [4:42:05<1:31:45, 56.31it/s]global step 690000, trans_decision ep_re 298.40187264498417

{"global_step": 690000, "eval_re": [322.7267844613612, 380.1931976832748, 
332.44871386757956, 280.6750176577846, 318.29217622966496, 190.33239530554573, 
385.7040848547866, 376.2438319581731, 329.2957664406911, 68.1067579909802], 
"eval_len": [123, 140, 126, 111, 122, 84, 138, 138, 124, 39]}

 70%|██████▉   | 699998/1000000 [4:45:48<1:28:02, 56.79it/s]global step 700000, trans_decision ep_re 441.13988612732993

{"global_step": 700000, "eval_re": [196.265629064994, 667.6476857949083, 
49.86912412129375, 693.0067571089907, 255.89491617268087, 428.5696645702966, 
433.8243277041943, 688.7200340895643, 264.73568107607457, 732.8650415703028], 
"eval_len": [101, 204, 30, 234, 104, 174, 153, 228, 107, 235]}

 71%|███████   | 709994/1000000 [4:49:33<1:24:56, 56.90it/s]global step 710000, trans_decision ep_re 300.8528224779434

{"global_step": 710000, "eval_re": [215.33224324057505, 338.792580211325, 
329.7868818976174, 211.7159538478744, 65.27101430264118, 444.8862879009631, 
397.63349402833126, 450.4790707019301, 145.0271515755697, 409.6035470726067], 
"eval_len": [90, 120, 125, 89, 40, 162, 135, 146, 88, 157]}

 72%|███████▏  | 719996/1000000 [4:53:17<1:23:15, 56.05it/s]global step 720000, trans_decision ep_re 315.7572079776594

{"global_step": 720000, "eval_re": [62.11417360448957, 252.79478307471595, 
265.5828335808458, 550.0429683736623, 237.26451974907172, 549.1022740197342, 
310.021364042439, 376.6107164224975, 229.66853802383199, 324.36990888530624], 
"eval_len": [38, 105, 177, 176, 98, 185, 116, 156, 96, 121]}

 73%|███████▎  | 729998/1000000 [4:57:01<1:18:57, 57.00it/s]global step 730000, trans_decision ep_re 69.56644121173274

{"global_step": 730000, "eval_re": [56.656015451284645, 47.51416784412829, 
73.15650414617653, 99.31726924443679, 47.394402964173814, 75.21638379479418, 
70.55666351719456, 75.28248956205073, 75.18427274126988, 75.38624285181795], 
"eval_len": [85, 28, 45, 72, 28, 46, 43, 46, 46, 46]}

 74%|███████▍  | 739994/1000000 [5:00:44<1:18:06, 55.48it/s]global step 740000, trans_decision ep_re 120.59567943763368

{"global_step": 740000, "eval_re": [131.18089448521397, 86.01530810993691, 
133.40138609304262, 135.28207949936, 129.57246221009763, 132.76737870648634, 
64.05109570170485, 130.76135106202156, 131.15251642294822, 131.7723220855246], 
"eval_len": [67, 56, 68, 68, 67, 68, 40, 67, 67, 68]}

 75%|███████▍  | 749996/1000000 [5:04:27<1:14:41, 55.79it/s]global step 750000, trans_decision ep_re 140.93988495370337

{"global_step": 750000, "eval_re": [167.25162949137228, 151.0740861437168, 
115.3273455914143, 150.98780498547674, 84.51592586468189, 67.5022553140105, 
217.88566978473213, 135.0243797115692, 144.70877767413435, 175.1209749759255], 
"eval_len": [77, 72, 62, 72, 49, 41, 139, 70, 70, 79]}

 76%|███████▌  | 759998/1000000 [5:08:09<1:10:37, 56.64it/s]global step 760000, trans_decision ep_re 185.3201605638716

{"global_step": 760000, "eval_re": [140.62176623398975, 297.7755811702557, 
148.69303542693993, 147.10971588370552, 141.05809866364197, 137.07951427987868, 
142.28968527870794, 252.4436048264796, 294.2381123443721, 151.89249153074488], 
"eval_len": [69, 111, 71, 71, 69, 68, 71, 102, 109, 72]}

 77%|███████▋  | 769994/1000000 [5:11:52<1:07:47, 56.55it/s]global step 770000, trans_decision ep_re 238.54235655073632

{"global_step": 770000, "eval_re": [290.2605709921927, 435.35076578241296, 
276.28041820575794, 314.3682599131072, 293.1959049720872, 96.14744436004449, 
44.56509280031453, 90.48896547637138, 209.01376924053517, 335.75237376453975], 
"eval_len": [111, 154, 107, 116, 111, 60, 26, 63, 90, 123]}

 78%|███████▊  | 779996/1000000 [5:15:35<1:05:02, 56.37it/s]global step 780000, trans_decision ep_re 316.5143040525406

{"global_step": 780000, "eval_re": [321.9531049672953, 473.3170948526023, 
523.5082738841667, 262.4298159518557, 568.2985029279033, 78.09838393566648, 
470.4506836668727, 124.9722606948894, 100.39700636831034, 241.71791327584333], 
"eval_len": [117, 192, 165, 102, 172, 49, 157, 73, 62, 110]}

 79%|███████▉  | 789998/1000000 [5:19:20<1:02:55, 55.62it/s]global step 790000, trans_decision ep_re 313.81560101935804

{"global_step": 790000, "eval_re": [407.72184214847607, 176.8789201435933, 
397.8653711992838, 352.28310780120717, 332.27691809192527, 353.7574160284391, 
305.95227636319413, 397.76698114238957, 230.02194700857893, 183.63123026649325],
"eval_len": [149, 80, 146, 132, 126, 133, 119, 146, 111, 82]}

 80%|███████▉  | 799994/1000000 [5:23:04<1:00:07, 55.43it/s]global step 800000, trans_decision ep_re 316.29037655723545

{"global_step": 800000, "eval_re": [477.75596522352095, 544.28304117864, 
143.26989089189502, 215.11175321229652, 78.48972844784879, 318.85028953515297, 
436.90443496193626, 467.06812360209733, 216.28050890048226, 264.8900296184843], 
"eval_len": [160, 178, 71, 93, 44, 120, 152, 158, 93, 140]}

 81%|████████  | 809996/1000000 [5:26:49<55:33, 57.00it/s]global step 810000, trans_decision ep_re 300.5007399060444

{"global_step": 810000, "eval_re": [312.2545603026984, 166.43961211111488, 
354.73497134984564, 391.1326225969433, 320.78205233692324, 382.80834215342406, 
213.7007390396197, 226.61517909618507, 313.5442439766646, 322.9950760970247], 
"eval_len": [116, 78, 167, 140, 120, 137, 92, 101, 125, 120]}

 82%|████████▏ | 819998/1000000 [5:30:33<53:26, 56.14it/s]global step 820000, trans_decision ep_re 326.55918009108694

{"global_step": 820000, "eval_re": [352.8635327007116, 601.102483029621, 
78.3724249466011, 492.8230522938616, 672.5350784634071, 139.13032757688967, 
79.05366074486682, 187.10822489510076, 196.20260571996388, 466.4004105398464], 
"eval_len": [147, 215, 54, 188, 242, 71, 47, 96, 89, 176]}

 83%|████████▎ | 829994/1000000 [5:34:17<49:48, 56.88it/s]global step 830000, trans_decision ep_re 163.55498271597858

{"global_step": 830000, "eval_re": [180.79522604969026, 180.89589054116541, 
177.81519021206714, 184.78849999891509, 184.86249903606443, 182.1433094730409, 
184.39010954315776, 160.8980011382616, 34.21876718543236, 164.74233398199075], 
"eval_len": [81, 81, 80, 82, 82, 81, 82, 75, 40, 76]}

 84%|████████▍ | 839996/1000000 [5:38:00<47:13, 56.46it/s]global step 840000, trans_decision ep_re 138.50202462625515

{"global_step": 840000, "eval_re": [99.92580142300739, 106.69300677709715, 
154.1181003260145, 147.66735215784115, 146.85742805523333, 143.4170981168276, 
144.20302673560911, 143.87028373778338, 154.6558459178709, 143.61230301526686], 
"eval_len": [71, 81, 83, 71, 71, 70, 71, 70, 73, 70]}

 85%|████████▍ | 849998/1000000 [5:41:43<44:08, 56.65it/s]global step 850000, trans_decision ep_re 257.43323288753027

{"global_step": 850000, "eval_re": [273.6727728375578, 91.89721638886174, 
69.78899360527939, 897.6832973728567, 416.03064387087454, 88.51462553051407, 
293.27446109938535, 71.61568950266764, 300.3682846013866, 71.48634406591867], 
"eval_len": [109, 65, 43, 318, 149, 66, 111, 44, 112, 44]}

 86%|████████▌ | 859994/1000000 [5:45:26<41:43, 55.92it/s]global step 860000, trans_decision ep_re 154.12320590668327

{"global_step": 860000, "eval_re": [144.76049173166624, 138.48147408385515, 
138.06336941696634, 195.4441676824843, 156.91438676886074, 211.23463433998893, 
158.22915806264584, 106.16813776361565, 147.28658781696834, 144.64965139978113],
"eval_len": [71, 69, 69, 88, 75, 99, 75, 73, 76, 71]}

 87%|████████▋ | 869996/1000000 [5:49:10<38:32, 56.23it/s]global step 870000, trans_decision ep_re 150.1626558882055

{"global_step": 870000, "eval_re": [193.36090539630675, 189.0356700740761, 
192.80966687933494, 196.788268539553, 186.08005310970677, 164.21428770824878, 
27.898708466670033, 73.54065852247959, 221.4709581925177, 56.42738199316154], 
"eval_len": [84, 83, 84, 85, 82, 76, 31, 45, 99, 34]}

 88%|████████▊ | 879998/1000000 [5:52:53<35:25, 56.46it/s]global step 880000, trans_decision ep_re 366.5234768557023

{"global_step": 880000, "eval_re": [96.97532182354057, 675.466807370053, 
629.9159412014754, 343.5224208705807, 65.82603783033447, 396.1752131942204, 
618.79096423914, 77.20420622089557, 622.5431896662751, 138.8146661405081], 
"eval_len": [57, 221, 209, 132, 47, 172, 205, 74, 207, 73]}

 89%|████████▉ | 889994/1000000 [5:56:37<32:54, 55.73it/s]global step 890000, trans_decision ep_re 140.61716373069012

{"global_step": 890000, "eval_re": [117.87724463951486, 147.91918822318675, 
144.5647490900792, 142.24544732144076, 44.52424938788006, 202.23486223929635, 
153.89920446219188, 147.94607881175574, 147.83837414610875, 157.12223898544667],
"eval_len": [61, 73, 72, 71, 26, 126, 75, 73, 73, 76]}

 90%|████████▉ | 899996/1000000 [6:00:20<29:15, 56.98it/s]global step 900000, trans_decision ep_re 504.4537181019925

{"global_step": 900000, "eval_re": [373.4114729718689, 48.46902249680588, 
514.9072934683253, 226.36073378646628, 204.37188584670838, 195.8542592803269, 
475.3532704920769, 576.9935932228647, 1360.428964315465, 1068.386685139017], 
"eval_len": [153, 36, 196, 101, 108, 101, 183, 218, 501, 357]}

 91%|█████████ | 909998/1000000 [6:04:05<26:41, 56.21it/s]global step 910000, trans_decision ep_re 117.37184904212423

{"global_step": 910000, "eval_re": [177.23323213355164, 116.87204995216965, 
109.20284204467626, 92.47645293593591, 145.7920890014193, 116.71124856244155, 
148.3936323602803, 124.66224053795213, 112.30946594802312, 30.065236944792254], 
"eval_len": [83, 63, 60, 54, 76, 63, 76, 66, 61, 25]}

 92%|█████████▏| 919994/1000000 [6:07:46<23:19, 57.18it/s]global step 920000, trans_decision ep_re 40.563683011007164

{"global_step": 920000, "eval_re": [61.269730424664694, 38.611071865060815, 
36.47519417122741, 37.04335986436583, 43.789317214209426, 38.76627566382792, 
38.59711925371005, 35.11284285751999, 38.688985218497024, 37.28293357698843], 
"eval_len": [76, 30, 29, 29, 34, 30, 30, 28, 30, 29]}

 93%|█████████▎| 929996/1000000 [6:11:28<20:42, 56.34it/s]global step 930000, trans_decision ep_re 103.9093525652332

{"global_step": 930000, "eval_re": [55.60692465978515, 60.26913256512167, 
196.01667737505363, 89.14942119936619, 58.5612821674156, 97.69835436232071, 
194.14470753219035, 64.06493899458346, 118.7196470058169, 104.86243979067847], 
"eval_len": [35, 38, 97, 65, 37, 53, 115, 40, 61, 58]}

 94%|█████████▍| 939998/1000000 [6:15:11<17:31, 57.05it/s]global step 940000, trans_decision ep_re 335.940090556923

{"global_step": 940000, "eval_re": [92.84329732925761, 118.05631645988282, 
901.1601825960422, 53.25711619349708, 199.88273136045828, 612.6297615123618, 
68.58112841871872, 333.9584175746845, 549.3758367921564, 429.6561173321707], 
"eval_len": [76, 61, 362, 31, 85, 210, 51, 130, 215, 165]}

 95%|█████████▍| 949994/1000000 [6:18:56<14:58, 55.64it/s]global step 950000, trans_decision ep_re 176.19520140538538

{"global_step": 950000, "eval_re": [196.97940333872708, 10.76203669628081, 
247.91920297108857, 181.09178514925034, 189.1750400045804, 182.0668671427526, 
130.03729346505963, 245.70302635991786, 162.10307476358972, 216.11428416260648],
"eval_len": [108, 14, 144, 103, 104, 102, 70, 138, 83, 110]}

 96%|█████████▌| 959996/1000000 [6:22:39<11:47, 56.52it/s]global step 960000, trans_decision ep_re 264.08644039296246

{"global_step": 960000, "eval_re": [84.50139769837287, 368.7328999188854, 
357.2110052632208, 109.27151007023961, 324.14910858025246, 326.2814000471593, 
33.057643639213, 464.8459451293821, 369.46382553525603, 203.34966804764284], 
"eval_len": [47, 158, 194, 56, 168, 166, 31, 193, 194, 94]}

 97%|█████████▋| 969998/1000000 [6:26:24<08:44, 57.19it/s]global step 970000, trans_decision ep_re 76.46280228365006

{"global_step": 970000, "eval_re": [74.57848109114784, 79.14756885411798, 
76.02691768227481, 77.48242364791898, 81.54337146830038, 65.69207766445847, 
89.38731078675556, 70.7587399997535, 70.92865951651336, 79.08247212525981], 
"eval_len": [54, 45, 44, 44, 46, 38, 53, 41, 41, 45]}

 98%|█████████▊| 979998/1000000 [6:30:07<05:51, 56.92it/s]global step 980000, trans_decision ep_re 309.52690010168527

{"global_step": 980000, "eval_re": [251.52214919553913, 287.9890382015209, 
263.46107930984874, 243.88515576721196, 278.96405770294547, 299.4611836010173, 
297.09189506888265, 261.5859104952115, 301.00552657428574, 610.3030051003892], 
"eval_len": [113, 132, 116, 115, 121, 127, 124, 117, 127, 202]}

 99%|█████████▉| 989998/1000000 [6:33:51<02:56, 56.75it/s]global step 990000, trans_decision ep_re 387.88161600379783

{"global_step": 990000, "eval_re": [207.5332803151242, 66.01850786127119, 
592.0677801641119, 553.1279748027852, 589.1228449641928, 668.7682980912263, 
584.9737541788999, 2.743911700344656, 576.931959621804, 37.52784833821819], 
"eval_len": [86, 60, 184, 177, 181, 200, 179, 14, 179, 31]}

100%|█████████▉| 999994/1000000 [6:37:38<00:00, 56.53it/s]global step 1000000, trans_decision ep_re 222.44113120112314

{"global_step": 1000000, "eval_re": [200.58239015548187, 289.0501857899806, 
228.91015910356452, 74.95568753018145, 253.20101726454712, 339.1988553164735, 
328.13389112940666, 192.70603886210816, 27.510830762074256, 290.16225609741355],
"eval_len": [87, 113, 95, 52, 103, 131, 125, 82, 36, 124]}

100%|██████████| 1000000/1000000 [6:37:45<00:00, 41.90it/s]
