
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.1
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:21<8:28:42, 32.44it/s]global step 10000, trans_decision ep_re 83.9047896006993

{"global_step": 10000, "eval_re": [59.526947093301224, 93.30072880468327, 
85.06022841521484, 96.3764946573728, 111.80474791588314, 56.913356698598655, 
80.8501770855297, 105.61236458595738, 91.13572976450585, 58.46712098594608], 
"eval_len": [39, 65, 63, 66, 69, 34, 63, 68, 68, 35]}

  2%|▏         | 19997/1000000 [09:57<8:16:56, 32.87it/s]global step 20000, trans_decision ep_re 134.04515351830452

{"global_step": 20000, "eval_re": [91.60043308516862, 158.71430771160897, 
121.83511427869512, 39.31009138762633, 237.50520186818338, 113.1688492509875, 
134.29460844613794, 62.59120159094426, 327.45980309006956, 53.97192447362368], 
"eval_len": [57, 92, 82, 39, 109, 84, 84, 54, 137, 48]}

  3%|▎         | 29999/1000000 [16:33<8:22:17, 32.19it/s]global step 30000, trans_decision ep_re 97.03971801430052

{"global_step": 30000, "eval_re": [38.38727162816338, 197.35560193276814, 
51.022761108954896, 124.45075203796708, 72.07570233071708, 104.51275338202758, 
97.21511458226901, 95.21575723174725, 125.3679106836129, 64.79355522477807], 
"eval_len": [30, 123, 37, 84, 56, 63, 68, 63, 85, 50]}

  4%|▍         | 39998/1000000 [23:11<8:07:24, 32.83it/s]global step 40000, trans_decision ep_re 125.83330095664556

{"global_step": 40000, "eval_re": [117.71854422935778, 115.32065997955209, 
108.57200021390744, 107.7495389152269, 80.86275167537299, 217.87296932778986, 
131.4495465417755, 107.98090926284117, 140.3548436356332, 130.45124578499866], 
"eval_len": [74, 80, 82, 71, 55, 126, 92, 68, 88, 81]}

  5%|▍         | 49997/1000000 [30:00<8:09:51, 32.32it/s]global step 50000, trans_decision ep_re 98.46133651623434

{"global_step": 50000, "eval_re": [79.357544560065, 65.51522714154855, 
89.82524478191324, 27.067905909033886, 90.69366802313016, 59.869945261350644, 
262.2494215624347, 85.29599700754413, 51.368655076475825, 173.36975583884714], 
"eval_len": [51, 61, 54, 29, 65, 50, 135, 58, 39, 109]}

  6%|▌         | 59999/1000000 [36:26<8:03:44, 32.39it/s]global step 60000, trans_decision ep_re 117.73598065733654

{"global_step": 60000, "eval_re": [168.41031584349832, 122.91546470492428, 
93.20089177429942, 105.73323516411635, 102.98004371572772, 120.15523972524701, 
67.90767575140366, 126.9494313301073, 133.20063130651334, 135.906877257528], 
"eval_len": [116, 81, 60, 69, 73, 77, 50, 88, 86, 85]}

  7%|▋         | 69998/1000000 [43:04<7:59:07, 32.35it/s]global step 70000, trans_decision ep_re 164.00198343362325

{"global_step": 70000, "eval_re": [297.6944307555206, 91.80978930679709, 
154.44608201265368, 119.77562052237705, 168.3262567389638, 139.655790948789, 
63.3533585366482, 285.3185558153433, 146.3308431254557, 173.3091065736839], 
"eval_len": [153, 59, 89, 72, 105, 86, 47, 144, 115, 101]}

  8%|▊         | 79997/1000000 [49:42<7:47:38, 32.79it/s]global step 80000, trans_decision ep_re 121.81610050057502

{"global_step": 80000, "eval_re": [205.6400577331271, 228.74001616827886, 
123.27090270590848, 81.37789511570223, 104.18806492780556, 73.14886033323465, 
79.373892121883, 135.44677419641314, 102.5026325246616, 84.47190917873561], 
"eval_len": [138, 143, 78, 52, 68, 53, 58, 86, 68, 62]}

  9%|▉         | 89996/1000000 [56:20<7:47:06, 32.47it/s]global step 90000, trans_decision ep_re 116.27147570889251

{"global_step": 90000, "eval_re": [32.389104309944486, 34.21305968524284, 
239.55940679936805, 268.46093576851183, 328.36737539138875, 35.43519928832449, 
30.15416454220834, 42.694809216770714, 32.98232165140792, 118.45838043575756], 
"eval_len": [39, 38, 112, 148, 151, 37, 39, 41, 36, 72]}

 10%|▉         | 99999/1000000 [1:03:10<7:41:43, 32.49it/s]global step 100000, trans_decision ep_re 162.8565194897151

{"global_step": 100000, "eval_re": [226.28725033937388, 83.81517241323297, 
77.97308403322322, 99.35428358574579, 170.1844866320374, 127.76555628489626, 
217.7729625684338, 305.21548610867893, 126.49275056394252, 193.70416236758638], 
"eval_len": [115, 63, 59, 74, 100, 73, 122, 167, 80, 108]}

 11%|█         | 109997/1000000 [1:09:36<7:37:46, 32.40it/s]global step 110000, trans_decision ep_re 263.83524535302087

{"global_step": 110000, "eval_re": [111.21286400836034, 157.6809216123622, 
171.20505774874792, 260.63440718294146, 248.38202342742088, 411.7571159652244, 
421.7757321147736, 259.42544471128764, 171.10805033258183, 425.17083642650834], 
"eval_len": [68, 98, 95, 124, 119, 164, 179, 119, 107, 174]}

 12%|█▏        | 119999/1000000 [1:16:16<7:32:27, 32.42it/s]global step 120000, trans_decision ep_re 192.55030090196266

{"global_step": 120000, "eval_re": [352.358051450879, 96.11860832447351, 
203.41684105223624, 107.95454339460471, 104.10368071234988, 120.5005451555402, 
444.1288969837705, 14.562310421650873, 288.32512579650023, 194.0344057276215], 
"eval_len": [167, 69, 113, 67, 66, 68, 188, 19, 144, 100]}

 13%|█▎        | 129998/1000000 [1:22:55<7:23:15, 32.71it/s]global step 130000, trans_decision ep_re 153.34920790452023

{"global_step": 130000, "eval_re": [164.75845637677625, 173.0749181004202, 
90.10115885143557, 134.36120495469308, 226.64204984091702, 79.4993830766512, 
18.190607219165987, 349.1558839565736, 86.8772842972808, 210.83113237128842], 
"eval_len": [91, 101, 63, 78, 111, 58, 20, 166, 62, 112]}

 14%|█▍        | 139997/1000000 [1:29:34<7:21:02, 32.50it/s]global step 140000, trans_decision ep_re 141.33921197737286

{"global_step": 140000, "eval_re": [107.6691370434583, 321.2275791271483, 
103.13913502337779, 113.16455703386612, 123.73236062238271, 64.18198393249092, 
127.36149769241395, 191.20835586471426, 152.59540973709016, 109.11210369678578],
"eval_len": [73, 151, 71, 80, 69, 43, 77, 97, 89, 71]}

 15%|█▍        | 149996/1000000 [1:36:11<7:13:10, 32.70it/s]global step 150000, trans_decision ep_re 184.16586254773375

{"global_step": 150000, "eval_re": [333.5786204045632, 73.94593330986427, 
83.2132728534263, 218.8780259538247, 75.17733237596516, 199.509497984463, 
183.39119038768342, 64.54187888235828, 198.90908302027864, 410.5137903049105], 
"eval_len": [150, 46, 58, 112, 53, 106, 97, 47, 101, 166]}

 16%|█▌        | 159998/1000000 [1:42:50<7:13:27, 32.30it/s]global step 160000, trans_decision ep_re 159.11712126376088

{"global_step": 160000, "eval_re": [204.64940730560573, 254.72540389313818, 
15.007747950592593, 151.38288724287204, 112.79842571182563, 197.79699563209735, 
190.0049268032609, 202.64932483192337, 15.77608440270047, 246.3800088635924], 
"eval_len": [105, 116, 18, 85, 91, 103, 119, 107, 19, 140]}

 17%|█▋        | 169997/1000000 [1:49:40<7:06:42, 32.42it/s]global step 170000, trans_decision ep_re 188.24100410256136

{"global_step": 170000, "eval_re": [191.95071045155947, 286.7159322103759, 
111.34878311635353, 68.92954503990791, 162.48977729613333, 131.04961046509297, 
275.20716853605586, 191.27330862808873, 352.4863404629745, 110.95886481907124], 
"eval_len": [94, 139, 68, 47, 100, 80, 138, 98, 161, 69]}

 18%|█▊        | 179997/1000000 [1:56:20<7:01:06, 32.45it/s]global step 180000, trans_decision ep_re 207.0478107042158

{"global_step": 180000, "eval_re": [225.0248964996335, 611.1042103679183, 
166.99659710234621, 218.73578812360805, 96.62252652752021, 15.606036698968072, 
393.9392661281813, 128.13654808038714, 20.558034745725394, 193.75420276787025], 
"eval_len": [113, 244, 87, 107, 84, 19, 164, 81, 26, 102]}

 19%|█▉        | 189997/1000000 [2:03:00<6:59:23, 32.19it/s]global step 190000, trans_decision ep_re 267.83604522738415

{"global_step": 190000, "eval_re": [479.67931255710744, 103.13570059513508, 
198.08120904978784, 30.2182273845575, 561.2447484549593, 190.05007068376506, 
720.7256576183103, 63.721687057207276, 303.44515881566673, 28.05868005734526], 
"eval_len": [198, 64, 101, 40, 215, 93, 294, 43, 142, 31]}

 20%|█▉        | 199997/1000000 [2:09:40<6:49:30, 32.56it/s]global step 200000, trans_decision ep_re 231.6701246616155

{"global_step": 200000, "eval_re": [268.70315654677665, 141.8796273682225, 
359.39637481250026, 177.71283843046723, 120.61168592619788, 323.68537587482257, 
162.0222628781072, 230.45005938022942, 97.10868830526366, 435.13117709356766], 
"eval_len": [126, 86, 159, 124, 87, 143, 97, 117, 69, 185]}

 21%|██        | 209997/1000000 [2:16:06<6:50:51, 32.05it/s]global step 210000, trans_decision ep_re 140.7656934688714

{"global_step": 210000, "eval_re": [122.76382879194489, 142.0642235170203, 
129.6993134873282, 68.62133163707247, 17.373132414917006, 106.3978596732098, 
185.52826714912888, 108.46603419406256, 130.5219887635123, 396.22095506051744], 
"eval_len": [88, 74, 81, 48, 18, 73, 101, 68, 78, 182]}

 22%|██▏       | 219999/1000000 [2:22:45<6:41:54, 32.35it/s]global step 220000, trans_decision ep_re 150.32872831730188

{"global_step": 220000, "eval_re": [69.55929807963405, 107.12812355341327, 
203.0964447803621, 137.65893526556508, 280.87673426218186, 103.23934642905691, 
91.63917399984544, 275.02752782707825, 107.13163949242099, 127.93005948346058], 
"eval_len": [50, 79, 108, 82, 130, 66, 63, 133, 65, 91]}

 23%|██▎       | 229997/1000000 [2:29:23<6:38:45, 32.18it/s]global step 230000, trans_decision ep_re 299.07465962759704

{"global_step": 230000, "eval_re": [538.3749966112605, 171.16987840015474, 
280.85736705693694, 675.3027178539359, 32.297751907842525, 13.74829078998935, 
332.5917118100489, 121.94367613506806, 665.7169725737742, 158.74323313695962], 
"eval_len": [206, 89, 123, 246, 36, 18, 143, 80, 239, 90]}

 24%|██▍       | 239999/1000000 [2:36:04<6:27:16, 32.71it/s]global step 240000, trans_decision ep_re 189.70724190338484

{"global_step": 240000, "eval_re": [148.8439274846874, 186.6652097802746, 
16.466593679979503, 100.06236409888255, 373.72848877715415, 313.39166640928937, 
25.642037898503403, 120.1503976099993, 593.8800138089081, 18.24171948617027], 
"eval_len": [83, 90, 21, 70, 176, 154, 27, 78, 225, 19]}

 25%|██▍       | 249997/1000000 [2:42:43<6:28:42, 32.16it/s]global step 250000, trans_decision ep_re 56.43360861493734

{"global_step": 250000, "eval_re": [35.073247104240785, 27.840895326237057, 
22.353267113771707, 26.408963093167014, 29.20367826605522, 222.67417220563547, 
107.57018896795178, 31.461103514986707, 29.173708517132184, 32.57686204019552], 
"eval_len": [31, 36, 27, 32, 30, 116, 86, 34, 35, 35]}

 26%|██▌       | 259997/1000000 [2:49:24<6:19:28, 32.50it/s]global step 260000, trans_decision ep_re 80.932343145237

{"global_step": 260000, "eval_re": [86.65876861618825, 80.79291234062023, 
91.04279233396966, 228.6589918808999, 92.90598772936818, 15.87827802971294, 
83.0343536985346, 33.96889669114406, 12.999555759439755, 83.38289437249226], 
"eval_len": [63, 60, 74, 108, 72, 21, 66, 38, 16, 63]}

 27%|██▋       | 269997/1000000 [2:56:02<6:17:01, 32.27it/s]global step 270000, trans_decision ep_re 181.3892073770639

{"global_step": 270000, "eval_re": [79.13207148495796, 139.5711771196601, 
167.70646826249373, 149.35327349718915, 274.9411803488385, 234.92899335850439, 
99.6975865058469, 553.0176116245951, 23.694117526866982, 91.84959404168644], 
"eval_len": [57, 90, 101, 73, 124, 114, 65, 213, 29, 81]}

 28%|██▊       | 279996/1000000 [3:02:40<6:13:40, 32.11it/s]global step 280000, trans_decision ep_re 152.17410115146313

{"global_step": 280000, "eval_re": [116.94263094426803, 25.536576514954195, 
58.4982311127645, 193.51191916874973, 416.4040059221925, 182.01535188325107, 
24.914502090140477, 165.63634031325637, 111.32676327209488, 226.95469029295967],
"eval_len": [82, 28, 42, 92, 165, 95, 29, 92, 76, 111]}

 29%|██▉       | 289997/1000000 [3:09:19<6:03:44, 32.53it/s]global step 290000, trans_decision ep_re 73.5147174900375

{"global_step": 290000, "eval_re": [31.95522241973554, 425.5485033687712, 
19.09168823390331, 36.50026135091742, 26.489151506246312, 29.025359581864887, 
65.7354194997369, 39.118776365716485, 34.907172578166055, 26.77561999531678], 
"eval_len": [39, 169, 27, 39, 32, 32, 55, 35, 37, 35]}

 30%|██▉       | 299997/1000000 [3:15:56<5:56:55, 32.69it/s]global step 300000, trans_decision ep_re 128.87057438258836

{"global_step": 300000, "eval_re": [116.78189005167715, 126.06413716453736, 
75.12237833564622, 250.61328984233043, 322.30266749814194, 29.363730799617493, 
35.38333035226735, 125.69811189414209, 32.15142667183638, 175.22478121568707], 
"eval_len": [77, 76, 63, 111, 144, 32, 38, 71, 37, 87]}

 31%|███       | 309996/1000000 [3:22:34<5:56:44, 32.24it/s]global step 310000, trans_decision ep_re 133.2270266805657

{"global_step": 310000, "eval_re": [134.1745982746404, 25.92977251449843, 
343.3924685985761, 12.778613901982908, 15.675001650898459, 72.6540813358331, 
211.90559989179712, 30.347964472770542, 451.47646885156644, 33.935697313093414],
"eval_len": [85, 27, 140, 15, 18, 64, 98, 34, 179, 30]}

 32%|███▏      | 319997/1000000 [3:29:16<5:56:27, 31.79it/s]global step 320000, trans_decision ep_re 96.2530338995156

{"global_step": 320000, "eval_re": [25.762510617912863, 211.50153387330087, 
61.208200660731386, 243.2713533277447, 71.40169626814463, 172.11880383386324, 
30.339800192545628, 79.84747780152674, 33.29057751835436, 33.78838490103158], 
"eval_len": [30, 102, 57, 115, 67, 92, 33, 61, 36, 36]}

 33%|███▎      | 329999/1000000 [3:36:10<5:46:08, 32.26it/s]global step 330000, trans_decision ep_re 182.4991272471559

{"global_step": 330000, "eval_re": [22.95150344193523, 19.784353237967935, 
97.90727414581411, 907.3714968086252, 15.96403905663639, 28.714484085105195, 
20.98644540505711, 667.7162976723808, 28.558299631547378, 15.037078986489535], 
"eval_len": [24, 29, 79, 298, 19, 29, 23, 228, 38, 19]}

 34%|███▍      | 339997/1000000 [3:42:50<5:40:09, 32.34it/s]global step 340000, trans_decision ep_re 275.78322618483145

{"global_step": 340000, "eval_re": [100.74198826817904, 80.29055260826796, 
594.8944391439223, 710.1314047478114, 177.03161270993579, 220.65799893597176, 
11.670068444192943, 670.1356156360033, 95.02589327438518, 97.25268807964457], 
"eval_len": [73, 71, 203, 215, 95, 105, 17, 237, 74, 80]}

 35%|███▍      | 349997/1000000 [3:49:16<5:32:00, 32.63it/s]global step 350000, trans_decision ep_re 68.62162073483323

{"global_step": 350000, "eval_re": [16.955306847787046, 153.8230319894483, 
32.995024935921535, 173.63459648436063, 19.207244817584492, 31.415690685293928, 
51.27055939772548, 26.839039396202093, 27.83831630133828, 152.23739649267054], 
"eval_len": [22, 80, 36, 97, 21, 33, 49, 33, 32, 74]}

 36%|███▌      | 359997/1000000 [3:55:52<5:29:42, 32.35it/s]global step 360000, trans_decision ep_re 146.7000621508941

{"global_step": 360000, "eval_re": [540.9410789593464, 29.762970154863027, 
267.38621076606574, 18.605001856194736, 177.70417744201188, 27.408670073463888, 
147.80692060798123, 12.738417651172707, 223.92995936642328, 20.717214631418024],
"eval_len": [189, 32, 132, 22, 97, 37, 101, 23, 104, 22]}

 37%|███▋      | 369999/1000000 [4:02:31<5:25:34, 32.25it/s]global step 370000, trans_decision ep_re 109.24759019818487

{"global_step": 370000, "eval_re": [133.685353325221, 167.47641425758545, 
218.84197292440413, 182.551684370671, 203.49817816454316, 13.969520178684135, 
72.77204464148546, 37.7967651719065, 22.400738090366577, 39.48323085698114], 
"eval_len": [86, 94, 104, 95, 99, 21, 69, 37, 24, 42]}

 38%|███▊      | 379999/1000000 [4:09:05<5:18:00, 32.49it/s]global step 380000, trans_decision ep_re 33.80795818838179

{"global_step": 380000, "eval_re": [15.719781428920438, 30.710280331830653, 
32.75221204730949, 46.53766442265887, 14.79510265360214, 76.80610276134689, 
29.820648031971643, 35.43529038112523, 21.461389907848265, 34.04110991720426], 
"eval_len": [21, 35, 40, 48, 18, 61, 38, 34, 27, 41]}

 39%|███▉      | 389999/1000000 [4:15:45<5:16:23, 32.13it/s]global step 390000, trans_decision ep_re 107.86606303845645

{"global_step": 390000, "eval_re": [63.779792419579216, 88.11196807850666, 
20.398352155822852, 18.462653458072595, 181.54420916139472, 57.10706889929752, 
29.456007993486715, 176.66382555062583, 19.912070225360974, 423.22468244241753],
"eval_len": [59, 67, 21, 22, 102, 58, 32, 96, 25, 179]}

 40%|███▉      | 399998/1000000 [4:22:24<5:10:35, 32.20it/s]global step 400000, trans_decision ep_re 261.6860959728109

{"global_step": 400000, "eval_re": [125.93989560771644, 24.77341692476517, 
353.09410220363276, 21.108368573152987, 320.49842026265634, 147.52801966642414, 
247.7507565518625, 77.90908030791046, 978.1412150227768, 320.11768460721106], 
"eval_len": [83, 30, 148, 21, 126, 86, 119, 70, 304, 138]}

 41%|████      | 409997/1000000 [4:29:02<5:01:39, 32.60it/s]global step 410000, trans_decision ep_re 67.27179338781215

{"global_step": 410000, "eval_re": [41.00572113936977, 193.33063800571838, 
290.7017857105331, 13.349337152937569, 21.1686746558848, 23.417676509863405, 
20.65158718213904, 20.836553445141842, 25.58347295219777, 22.672487124335927], 
"eval_len": [35, 104, 130, 17, 23, 29, 23, 31, 32, 30]}

 42%|████▏     | 419997/1000000 [4:35:38<4:52:54, 33.00it/s]global step 420000, trans_decision ep_re 33.31665978854381

{"global_step": 420000, "eval_re": [14.841779861268016, 26.720230910052436, 
23.948834058780687, 41.679605986028236, 23.559526512321007, 54.57248639082852, 
40.607276644648586, 76.4502539240394, 16.692977085772274, 14.09362651169894], 
"eval_len": [18, 35, 26, 41, 27, 56, 39, 55, 22, 20]}

 43%|████▎     | 429997/1000000 [4:42:13<4:54:18, 32.28it/s]global step 430000, trans_decision ep_re 89.32185226772893

{"global_step": 430000, "eval_re": [31.190612856664657, 13.964561730624368, 
71.20736037992927, 24.080736139069693, 60.974355465242965, 18.761573845973764, 
26.117641950298307, 31.35607118135175, 19.832355166782666, 595.7332539613519], 
"eval_len": [31, 22, 59, 28, 55, 27, 29, 31, 23, 204]}

 44%|████▍     | 439997/1000000 [4:48:49<4:45:26, 32.70it/s]global step 440000, trans_decision ep_re 79.17772961653121

{"global_step": 440000, "eval_re": [38.97628090128057, 489.1717501540152, 
21.726527247538193, 24.629091569697852, 24.462749921548607, 30.656597011558226, 
17.963024427693593, 65.78903085395093, 14.060308787039801, 64.3419352909891], 
"eval_len": [43, 168, 22, 31, 23, 36, 24, 53, 21, 56]}

 45%|████▍     | 449997/1000000 [4:55:23<4:38:55, 32.87it/s]global step 450000, trans_decision ep_re 180.99510109508162

{"global_step": 450000, "eval_re": [59.09755720541223, 132.52060551227805, 
340.6996783886717, 64.8646970628059, 60.695551049816174, 59.771400944487965, 
334.1741033427439, 69.18538225586153, 600.1862565828528, 88.755778605886], 
"eval_len": [54, 73, 144, 60, 51, 63, 146, 58, 222, 66]}

 46%|████▌     | 459996/1000000 [5:01:57<4:33:52, 32.86it/s]global step 460000, trans_decision ep_re 142.60546983210023

{"global_step": 460000, "eval_re": [18.072567083017795, 260.1625488816114, 
202.0177536398446, 34.5001536713545, 36.240231836319325, 35.17204079547897, 
599.4112767616541, 33.83876321371284, 167.13457641597927, 39.50478602202943], 
"eval_len": [30, 124, 94, 34, 36, 35, 204, 38, 85, 35]}

 47%|████▋     | 469999/1000000 [5:08:30<4:29:15, 32.81it/s]global step 470000, trans_decision ep_re 134.93785845499798

{"global_step": 470000, "eval_re": [20.27439258023983, 20.528171431624408, 
288.5539535378361, 15.826140820826687, 21.651589401240713, 528.4595267261732, 
123.98603548722846, 58.62371794094449, 192.01115223283773, 79.46390439102805], 
"eval_len": [24, 23, 129, 21, 36, 202, 71, 50, 88, 67]}

 48%|████▊     | 479998/1000000 [5:15:03<4:22:37, 33.00it/s]global step 480000, trans_decision ep_re 141.70952702457026

{"global_step": 480000, "eval_re": [198.41688829678804, 17.58865254456412, 
19.081963993089825, 438.17433853050704, 178.39868557723292, 16.443143515850874, 
17.439770113891846, 368.1645951103792, 88.05306410368294, 75.33416845971549], 
"eval_len": [100, 23, 24, 183, 100, 22, 21, 138, 54, 50]}

 49%|████▉     | 489997/1000000 [5:21:36<4:18:55, 32.83it/s]global step 490000, trans_decision ep_re 229.1681928150846

{"global_step": 490000, "eval_re": [183.9378455862397, 146.53305332246217, 
343.036448418866, 177.3151435370316, 879.344099504657, 158.51219451471937, 
140.84444227075608, 121.09061294232313, 73.2513301466166, 67.81675790717475], 
"eval_len": [93, 80, 143, 84, 293, 91, 88, 74, 71, 66]}

 50%|████▉     | 499996/1000000 [5:28:11<4:14:00, 32.81it/s]global step 500000, trans_decision ep_re 105.34903930858505

{"global_step": 500000, "eval_re": [210.72783936050132, 55.83408106917902, 
55.79361766022965, 64.5700941263454, 55.816094810899436, 76.9054927846468, 
124.95638912091621, 57.22945413244864, 287.3013443889254, 64.35598563175864], 
"eval_len": [99, 40, 42, 47, 37, 54, 88, 36, 136, 44]}

 51%|█████     | 509996/1000000 [5:34:44<4:08:32, 32.86it/s]global step 510000, trans_decision ep_re 170.33325323840714

{"global_step": 510000, "eval_re": [158.84330461374213, 192.59267084585622, 
12.01389336293839, 178.89946232456273, 217.30013673431307, 153.81602751355456, 
18.96346975763201, 665.5085924581927, 72.18447573630655, 33.21049903697315], 
"eval_len": [85, 97, 16, 95, 115, 92, 22, 227, 53, 33]}

 52%|█████▏    | 519999/1000000 [5:41:30<4:02:59, 32.92it/s]global step 520000, trans_decision ep_re 389.4094857668752

{"global_step": 520000, "eval_re": [380.69667418074323, 105.4095728621881, 
591.49687236595, 100.38225507926008, 237.24716940192994, 661.1649460713932, 
826.1373902832271, 589.5178714440652, 134.69187541701962, 267.3502305629756], 
"eval_len": [146, 81, 206, 87, 117, 233, 264, 217, 85, 112]}

 53%|█████▎    | 529997/1000000 [5:47:52<3:57:02, 33.05it/s]global step 530000, trans_decision ep_re 269.35422489153416

{"global_step": 530000, "eval_re": [21.10892959872038, 54.16781550979866, 
1176.45706804313, 714.5835287661344, 270.1509216409988, 24.94586588560907, 
32.5550543720626, 164.48250922873748, 185.85765440000716, 49.23290147014332], 
"eval_len": [25, 53, 421, 291, 127, 28, 38, 84, 96, 45]}

 54%|█████▍    | 539996/1000000 [5:54:27<3:52:47, 32.93it/s]global step 540000, trans_decision ep_re 215.2561806657896

{"global_step": 540000, "eval_re": [32.743566890553524, 98.16363772244245, 
86.2812780841955, 614.3902287966125, 14.186796389369121, 64.011415760952, 
166.6496562335539, 315.55808395670846, 697.8586461089171, 62.71849671459131], 
"eval_len": [37, 62, 68, 216, 20, 55, 94, 137, 252, 59]}

 55%|█████▍    | 549999/1000000 [6:01:00<3:48:39, 32.80it/s]global step 550000, trans_decision ep_re 189.33549987440256

{"global_step": 550000, "eval_re": [17.04989547653585, 26.187347452687554, 
38.75846390180039, 25.199568109055175, 911.7169660181042, 49.38472979259062, 
24.749727567776244, 13.032615642644963, 745.2728362316789, 42.002848551151665], 
"eval_len": [18, 32, 35, 31, 319, 41, 29, 16, 238, 37]}

 56%|█████▌    | 559998/1000000 [6:07:33<3:42:24, 32.97it/s]global step 560000, trans_decision ep_re 133.58615586178152

{"global_step": 560000, "eval_re": [18.856892497959286, 55.41540700724865, 
169.03264757075635, 75.40287192048704, 76.13454716137186, 821.7523675904931, 
17.955996806520403, 25.33174842167914, 55.90055044491219, 20.07852919638722], 
"eval_len": [20, 56, 101, 63, 74, 258, 20, 25, 60, 22]}

 57%|█████▋    | 569997/1000000 [6:14:06<3:38:28, 32.80it/s]global step 570000, trans_decision ep_re 134.80922739750994

{"global_step": 570000, "eval_re": [432.7126724295937, 85.5104757479889, 
39.479179828770135, 78.66577243541367, 33.234262945569405, 274.11929754550994, 
222.48942196963785, 36.49115822013321, 97.01996359154145, 48.3700692609412], 
"eval_len": [165, 57, 47, 65, 38, 122, 107, 38, 64, 51]}

 58%|█████▊    | 579996/1000000 [6:20:38<3:32:43, 32.91it/s]global step 580000, trans_decision ep_re 212.1780854863166

{"global_step": 580000, "eval_re": [92.00939124358268, 1133.4414468141642, 
128.99545806458056, 164.5814398520187, 58.53666359355973, 119.12908670874317, 
86.68851202606773, 114.80435067945655, 121.6775952544191, 101.91691062657348], 
"eval_len": [71, 387, 83, 94, 42, 75, 70, 65, 78, 68]}

 59%|█████▉    | 589999/1000000 [6:27:12<3:26:36, 33.07it/s]global step 590000, trans_decision ep_re 144.0361961656268

{"global_step": 590000, "eval_re": [170.38001318219216, 26.818273931683564, 
21.524076210313794, 32.874508044469614, 33.90380095123766, 284.42346645932486, 
32.180421469511806, 789.8904602395207, 22.16486950325064, 26.202071664763203], 
"eval_len": [95, 32, 26, 35, 32, 121, 32, 280, 29, 33]}

 60%|█████▉    | 599999/1000000 [6:33:44<3:22:09, 32.98it/s]global step 600000, trans_decision ep_re 123.91059110518373

{"global_step": 600000, "eval_re": [215.63345644314353, 194.93546160481634, 
26.112090835142073, 256.68823175204983, 22.91176477922599, 20.58838338112722, 
24.72282374552792, 419.05576588055845, 30.130224513759167, 28.32770811648665], 
"eval_len": [114, 103, 33, 111, 26, 27, 30, 173, 30, 33]}

 61%|██████    | 609999/1000000 [6:40:17<3:17:58, 32.83it/s]global step 610000, trans_decision ep_re 184.67171688405332

{"global_step": 610000, "eval_re": [591.0676074823751, 155.11579973045485, 
58.99797139102946, 39.941861988435726, 331.9076063270992, 151.4558438558897, 
300.4896271944362, 27.27792806987544, 29.805280304193595, 160.65764249674413], 
"eval_len": [193, 103, 56, 41, 139, 88, 136, 31, 31, 94]}

 62%|██████▏   | 619998/1000000 [6:47:00<3:08:24, 33.62it/s]global step 620000, trans_decision ep_re 118.91333562000139

{"global_step": 620000, "eval_re": [24.12847447130037, 24.685568402966904, 
32.618067405962236, 19.623010667967748, 26.091141581188218, 18.172826901783477, 
558.7790174572449, 451.8294957864504, 12.93779467502646, 20.267958850122938], 
"eval_len": [32, 29, 33, 25, 28, 24, 219, 178, 19, 26]}

 63%|██████▎   | 629999/1000000 [6:53:22<3:06:45, 33.02it/s]global step 630000, trans_decision ep_re 178.79472028336585

{"global_step": 630000, "eval_re": [75.83747290836482, 57.24600186778466, 
131.93426031720307, 171.3391098618867, 438.0397186319527, 225.38166356082172, 
264.4068941463866, 59.07544359419084, 62.76540495306748, 301.92123299199983], 
"eval_len": [60, 43, 86, 91, 179, 111, 115, 44, 39, 124]}

 64%|██████▍   | 639998/1000000 [6:59:55<3:02:03, 32.96it/s]global step 640000, trans_decision ep_re 183.62913416654663

{"global_step": 640000, "eval_re": [32.66720737990191, 25.203666962332935, 
175.04268910536567, 724.675889558369, 179.4449644553821, 38.05670997534176, 
185.1482989728914, 18.97319946437427, 34.47934482351911, 422.5993709679882], 
"eval_len": [33, 31, 89, 251, 87, 37, 96, 29, 34, 164]}

 65%|██████▍   | 649997/1000000 [7:06:28<2:56:26, 33.06it/s]global step 650000, trans_decision ep_re 203.88607992331077

{"global_step": 650000, "eval_re": [219.01994584718315, 15.1258277133417, 
546.7072797051883, 264.38055363664193, 109.77529023915542, 158.61977698369532, 
78.08994474913888, 337.9591458356366, 181.8108492630753, 127.372185260051], 
"eval_len": [103, 18, 210, 125, 74, 91, 72, 141, 95, 86]}

 66%|██████▌   | 659996/1000000 [7:13:01<2:52:31, 32.84it/s]global step 660000, trans_decision ep_re 71.66663970348876

{"global_step": 660000, "eval_re": [197.88389643473207, 67.53922853999055, 
23.0622642540788, 182.61756746954518, 83.37819330940258, 71.65133381334418, 
17.40508813085695, 27.32081199029526, 25.389613910595102, 20.418399182046816], 
"eval_len": [102, 53, 27, 87, 70, 65, 23, 34, 32, 26]}

 67%|██████▋   | 669996/1000000 [7:19:33<2:46:28, 33.04it/s]global step 670000, trans_decision ep_re 215.54168428645636

{"global_step": 670000, "eval_re": [142.73404587978803, 46.76932110103539, 
293.1685111147831, 424.7547874879101, 64.72337422299779, 158.34288804367557, 
16.36247145322757, 250.75828150011495, 441.22291826249466, 316.5802437985361], 
"eval_len": [85, 50, 132, 165, 61, 81, 23, 111, 171, 120]}

 68%|██████▊   | 679999/1000000 [7:26:07<2:42:16, 32.87it/s]global step 680000, trans_decision ep_re 86.65216401488497

{"global_step": 680000, "eval_re": [19.690420084989327, 21.888405093924398, 
319.08675763178843, 67.70988654011582, 93.37022785344021, 19.25025431068036, 
141.95800429796714, 38.78841591212517, 40.25091150984366, 104.52835691397522], 
"eval_len": [22, 28, 145, 57, 71, 28, 81, 40, 40, 70]}

 69%|██████▉   | 689999/1000000 [7:32:50<2:37:26, 32.82it/s]global step 690000, trans_decision ep_re 163.24539999036048

{"global_step": 690000, "eval_re": [145.4835365439988, 26.584692256467235, 
25.548158099521146, 408.0631059114845, 57.5190573574896, 66.65654485386314, 
37.805960703999354, 56.29951414586497, 588.168309983985, 220.32512004693103], 
"eval_len": [75, 31, 35, 170, 44, 55, 34, 54, 210, 112]}

 70%|██████▉   | 699999/1000000 [7:39:13<2:31:02, 33.10it/s]global step 700000, trans_decision ep_re 201.08239567612816

{"global_step": 700000, "eval_re": [14.708889260236534, 18.581580528078007, 
420.3962145745919, 50.83213507610723, 338.9552990896076, 347.9292435751275, 
127.14144812344007, 124.04182207812894, 261.2295063237441, 307.00781813221965], 
"eval_len": [17, 27, 159, 47, 147, 150, 83, 67, 109, 134]}

 71%|███████   | 709998/1000000 [7:46:00<2:26:52, 32.91it/s]global step 710000, trans_decision ep_re 273.95784680358673

{"global_step": 710000, "eval_re": [342.5550430793411, 150.09432494669952, 
200.73473614624757, 248.48623766205205, 241.72212536330633, 446.23216151082516, 
508.9676455373095, 191.23988408056198, 258.1556778213205, 151.39063188820353], 
"eval_len": [125, 88, 104, 122, 114, 172, 183, 100, 123, 79]}

 72%|███████▏  | 719997/1000000 [7:52:23<2:22:05, 32.84it/s]global step 720000, trans_decision ep_re 287.2264646986825

{"global_step": 720000, "eval_re": [358.32921994301756, 287.59331080807164, 
546.6753225756279, 555.0581420888831, 155.81205538397614, 219.49645168413164, 
275.0327308297059, 289.51963888581, 150.45854375148497, 34.289231036116725], 
"eval_len": [153, 120, 204, 204, 80, 110, 125, 131, 80, 39]}

 73%|███████▎  | 729996/1000000 [7:58:58<2:17:20, 32.77it/s]global step 730000, trans_decision ep_re 113.73677032062214

{"global_step": 730000, "eval_re": [16.73346556323364, 42.46596891833945, 
284.8425888882804, 63.18161603497446, 185.5408959846607, 50.91815060543452, 
38.579623922141145, 148.07622710917445, 111.20463351173552, 195.82453266824723],
"eval_len": [19, 38, 130, 52, 91, 41, 41, 91, 82, 101]}

 74%|███████▍  | 739999/1000000 [8:05:32<2:12:06, 32.80it/s]global step 740000, trans_decision ep_re 167.92829815951663

{"global_step": 740000, "eval_re": [166.1342671957278, 39.16059637693736, 
371.74969747033265, 21.67084981280989, 15.276293136722638, 157.31781938508914, 
158.097451313092, 145.95112746575631, 380.39668221459004, 223.52819722410845], 
"eval_len": [84, 37, 162, 32, 20, 87, 80, 82, 147, 107]}

 75%|███████▍  | 749998/1000000 [8:12:20<2:06:16, 33.00it/s]global step 750000, trans_decision ep_re 377.47272130371755

{"global_step": 750000, "eval_re": [320.27411314367276, 212.05942313571677, 
459.07240404235904, 465.3293907173538, 566.5391586609735, 247.50030194318862, 
32.30175004572219, 1219.6341180780225, 86.36341690665026, 165.65313636351624], 
"eval_len": [141, 100, 180, 188, 201, 116, 38, 395, 66, 88]}

 76%|███████▌  | 759997/1000000 [8:18:45<2:02:55, 32.54it/s]global step 760000, trans_decision ep_re 224.99353963620143

{"global_step": 760000, "eval_re": [178.1873717533784, 603.4479239820517, 
460.0643354757633, 194.1477967002617, 208.88303835149202, 169.32897577510403, 
37.10862548046894, 37.89691398098492, 184.5078778755049, 176.36253698700406], 
"eval_len": [85, 222, 181, 99, 106, 85, 43, 37, 88, 93]}

 77%|███████▋  | 769996/1000000 [8:25:21<1:56:29, 32.91it/s]global step 770000, trans_decision ep_re 113.2056965694884

{"global_step": 770000, "eval_re": [32.96232707050009, 14.845349070712476, 
183.94576516783619, 54.1023869743538, 28.887868164295888, 56.18484766802069, 
402.596700114053, 307.30525000558526, 29.949805943419978, 21.276665516106537], 
"eval_len": [36, 17, 93, 52, 31, 58, 155, 135, 33, 26]}

 78%|███████▊  | 779999/1000000 [8:31:55<1:50:51, 33.08it/s]global step 780000, trans_decision ep_re 234.58186801300954

{"global_step": 780000, "eval_re": [312.74146006984404, 332.1678692181038, 
371.26326719213233, 27.70176069144169, 346.49353569290554, 21.67097690205061, 
669.3037977872985, 35.75870185296682, 171.1287547169676, 57.588556006384195], 
"eval_len": [122, 138, 144, 30, 136, 28, 232, 37, 84, 49]}

 79%|███████▉  | 789998/1000000 [8:38:40<1:46:33, 32.85it/s]global step 790000, trans_decision ep_re 140.88663551953783

{"global_step": 790000, "eval_re": [278.49229858180837, 27.474234256224356, 
163.09787821958096, 32.702486719265316, 13.340771051949128, 23.260322444766896, 
45.10236325891926, 27.502777627786127, 211.71500085525662, 586.1782221798211], 
"eval_len": [116, 30, 88, 33, 15, 31, 46, 32, 104, 217]}

 80%|███████▉  | 799999/1000000 [8:45:04<1:41:16, 32.91it/s]global step 800000, trans_decision ep_re 100.77804785259308

{"global_step": 800000, "eval_re": [44.06356935974915, 16.827196078868973, 
77.80858927538203, 13.276912967350507, 548.4680591192441, 14.816932294343442, 
34.294057332934216, 22.27331752017013, 74.24459499427492, 161.70724958361322], 
"eval_len": [46, 18, 64, 17, 199, 17, 46, 30, 58, 93]}

 81%|████████  | 809999/1000000 [8:51:50<1:36:45, 32.73it/s]global step 810000, trans_decision ep_re 220.58105716603126

{"global_step": 810000, "eval_re": [43.36525533069705, 373.90965837566347, 
237.5058010915039, 44.874765863028095, 76.09667549818751, 170.53847553296885, 
76.10858865245038, 697.8182307817953, 406.24112167885767, 79.35199885516043], 
"eval_len": [47, 161, 113, 47, 60, 98, 63, 216, 154, 66]}

 82%|████████▏ | 819999/1000000 [8:58:14<1:30:52, 33.01it/s]global step 820000, trans_decision ep_re 203.327964664853

{"global_step": 820000, "eval_re": [210.18117217016015, 257.66315995614536, 
54.62339856020955, 245.49075072502117, 177.0154649783587, 28.07601187990792, 
220.5060131693338, 200.2210733382642, 42.0659259011842, 597.4366759699446], 
"eval_len": [98, 114, 52, 126, 94, 34, 106, 103, 44, 204]}

 83%|████████▎ | 829998/1000000 [9:05:00<1:26:21, 32.81it/s]global step 830000, trans_decision ep_re 188.54531094022718

{"global_step": 830000, "eval_re": [129.60681791000954, 455.46855235606245, 
17.440435231878624, 25.923137821669503, 205.81616521521923, 249.20349622754824, 
27.30746996713449, 200.40153102669458, 237.4329979558722, 336.8525056901829], 
"eval_len": [68, 161, 25, 34, 102, 119, 31, 105, 108, 149]}

 84%|████████▍ | 839999/1000000 [9:11:25<1:20:58, 32.93it/s]global step 840000, trans_decision ep_re 191.5580777648399

{"global_step": 840000, "eval_re": [482.1424709943949, 46.930063739122986, 
43.27580610648054, 369.61801358056704, 24.932171367103354, 549.1572746975156, 
36.354131040275874, 32.10198406673274, 32.67758845346075, 298.39127360274546], 
"eval_len": [169, 56, 44, 142, 33, 190, 40, 32, 42, 126]}

 85%|████████▍ | 849998/1000000 [9:18:02<1:16:30, 32.67it/s]global step 850000, trans_decision ep_re 204.1103708899697

{"global_step": 850000, "eval_re": [28.435385548314382, 20.42262262511563, 
623.2466765817541, 27.09455499361537, 38.746638380566466, 16.03663056532163, 
395.88948102347416, 557.1969731961998, 266.6005231126841, 67.43422287265146], 
"eval_len": [32, 28, 206, 34, 33, 29, 158, 202, 127, 63]}

 86%|████████▌ | 859997/1000000 [9:24:41<1:11:25, 32.67it/s]global step 860000, trans_decision ep_re 188.93478308658203

{"global_step": 860000, "eval_re": [269.2572039747505, 204.93341579724236, 
595.866377441455, 30.579858898669666, 212.37581065355917, 39.92766856997303, 
377.06728870747037, 63.45901707587099, 55.61621017248676, 40.26497957434255], 
"eval_len": [124, 99, 208, 34, 101, 39, 153, 52, 58, 43]}

 87%|████████▋ | 869996/1000000 [9:31:17<1:05:50, 32.90it/s]global step 870000, trans_decision ep_re 192.29271163836782

{"global_step": 870000, "eval_re": [157.05669979779668, 152.4953648435817, 
621.5345961688336, 239.30853424128634, 42.48260675469393, 53.58906342288549, 
179.83069419193916, 192.92304230270804, 268.2085524743834, 15.497962185569882], 
"eval_len": [80, 83, 219, 114, 35, 51, 97, 95, 113, 19]}

 88%|████████▊ | 879999/1000000 [9:37:52<1:01:02, 32.77it/s]global step 880000, trans_decision ep_re 208.11212190424803

{"global_step": 880000, "eval_re": [37.467833343365754, 394.73270393272173, 
255.75877059600379, 159.50452605082654, 164.57218013869925, 117.57373571577874, 
201.57020611414248, 163.5467129265327, 497.54631220280993, 88.848238021599], 
"eval_len": [43, 169, 104, 87, 99, 79, 103, 77, 180, 61]}

 89%|████████▉ | 889998/1000000 [9:44:28<55:28, 33.04it/s]global step 890000, trans_decision ep_re 154.46983470549694

{"global_step": 890000, "eval_re": [48.47198297768417, 137.83439222717408, 
105.6166280170959, 297.6346791123472, 138.56590870732498, 383.09287910984466, 
165.10003077669072, 36.66362722104804, 72.00979901080325, 159.70841989495642], 
"eval_len": [51, 77, 76, 123, 86, 160, 95, 34, 52, 83]}

 90%|████████▉ | 899997/1000000 [9:51:03<50:44, 32.85it/s]global step 900000, trans_decision ep_re 57.07582867658315

{"global_step": 900000, "eval_re": [19.47913939996483, 151.0853149070686, 
64.81558007837675, 182.42760324377954, 29.098917443772667, 51.85136212178885, 
16.956354762280665, 17.27283461756932, 18.660323878617714, 19.110856312612466], 
"eval_len": [25, 104, 52, 100, 28, 50, 19, 22, 23, 25]}

 91%|█████████ | 909997/1000000 [9:57:38<46:25, 32.31it/s]global step 910000, trans_decision ep_re 138.68992161771834

{"global_step": 910000, "eval_re": [199.67959667021555, 405.72396013110915, 
23.647872684221326, 71.94874972277395, 28.87539774174965, 196.18503262179007, 
151.1971449313063, 81.45672892036984, 33.56537363637191, 194.61935911727548], 
"eval_len": [93, 153, 28, 69, 37, 106, 76, 76, 39, 104]}

 92%|█████████▏| 919996/1000000 [10:04:16<40:46, 32.70it/s]global step 920000, trans_decision ep_re 154.10276712111371

{"global_step": 920000, "eval_re": [235.8741996709784, 33.20191261632229, 
109.8273373565359, 333.0868466909943, 174.99348571302693, 71.4698039902344, 
30.237157274225297, 172.16198649270302, 161.45928552708676, 218.71565587902973],
"eval_len": [107, 35, 74, 139, 101, 62, 40, 90, 81, 112]}

 93%|█████████▎| 929999/1000000 [10:10:51<35:23, 32.96it/s]global step 930000, trans_decision ep_re 150.20450898897843

{"global_step": 930000, "eval_re": [68.33299989483119, 21.216477931012506, 
22.142497963447994, 44.21892498440209, 34.77991315172889, 247.428073376226, 
34.38828271748224, 55.10213077238132, 372.8757149337145, 601.5600741645575], 
"eval_len": [58, 21, 24, 48, 38, 109, 37, 48, 151, 216]}

 94%|█████████▍| 939999/1000000 [10:17:26<30:11, 33.13it/s]global step 940000, trans_decision ep_re 204.50546661161326

{"global_step": 940000, "eval_re": [273.00743855750795, 297.38170421505015, 
139.1626427844228, 38.31167282549652, 194.38164163808466, 158.35213952589658, 
13.852670604423864, 424.68908920718326, 315.2134059893921, 190.70226076867525], 
"eval_len": [111, 123, 72, 44, 92, 100, 18, 163, 136, 100]}

 95%|█████████▍| 949998/1000000 [10:24:01<25:20, 32.88it/s]global step 950000, trans_decision ep_re 284.19813250474226

{"global_step": 950000, "eval_re": [474.7761417349081, 64.83122709127638, 
163.26255711345024, 162.57398072105386, 438.86417029067377, 282.80256523469103, 
357.5386405499004, 242.37536964532558, 400.8240906982304, 254.13258196791256], 
"eval_len": [179, 50, 83, 81, 171, 126, 167, 114, 160, 112]}

 96%|█████████▌| 959997/1000000 [10:30:37<20:31, 32.50it/s]global step 960000, trans_decision ep_re 238.04827022020396

{"global_step": 960000, "eval_re": [190.38493395869185, 58.44993653630229, 
101.69035818816187, 737.2104967468606, 201.45017229606648, 457.7749580831247, 
61.88516953364169, 61.164277083843366, 23.670508288556587, 486.80189148678994], 
"eval_len": [95, 53, 64, 251, 96, 166, 62, 55, 26, 190]}

 97%|█████████▋| 969996/1000000 [10:37:13<15:09, 33.00it/s]global step 970000, trans_decision ep_re 254.14215863854693

{"global_step": 970000, "eval_re": [134.72361029560386, 922.050288093847, 
379.8768587856398, 218.93464890795332, 361.4711760079719, 41.782789310553696, 
110.35918384441135, 103.39611397323533, 169.03983366176885, 99.78708350448362], 
"eval_len": [72, 313, 151, 99, 148, 44, 69, 60, 94, 60]}

 98%|█████████▊| 979999/1000000 [10:43:47<10:06, 32.99it/s]global step 980000, trans_decision ep_re 281.39432186844016

{"global_step": 980000, "eval_re": [55.90670610418367, 936.6369716819074, 
53.82571272061776, 442.9755912120047, 109.45774647241498, 429.5101456273877, 
387.11978598074967, 158.3228948580963, 184.31373029149321, 55.873933735546], 
"eval_len": [60, 304, 55, 173, 64, 173, 146, 95, 96, 50]}

 99%|█████████▉| 989998/1000000 [10:50:23<05:04, 32.85it/s]global step 990000, trans_decision ep_re 207.01129977123492

{"global_step": 990000, "eval_re": [259.58721418544985, 156.9244080694305, 
370.0279608946425, 223.06176192492347, 141.050055095743, 167.11080285203576, 
164.71253828109198, 205.28779607539522, 221.68443014679391, 160.66603018684282],
"eval_len": [133, 88, 175, 107, 82, 99, 84, 108, 107, 97]}

100%|█████████▉| 999997/1000000 [10:57:10<00:00, 32.93it/s]global step 1000000, trans_decision ep_re 234.01616687941765

{"global_step": 1000000, "eval_re": [156.0609021836812, 251.43349938672833, 
296.79468591077904, 409.36992289874485, 155.16966805897874, 286.6861318141182, 
146.01139689036333, 240.62192650730367, 234.9580916782298, 163.055443465249], 
"eval_len": [97, 114, 130, 163, 88, 121, 87, 122, 108, 95]}

100%|██████████| 1000000/1000000 [10:57:11<00:00, 25.36it/s]
