
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.1
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [04:03<9:45:36, 28.18it/s]global step 10000, trans_decision ep_re 264.30822640130634

{"global_step": 10000, "eval_re": [241.07190501269594, 331.5957179933006, 
215.73881249911642, 330.58870235381676, 241.68022005420164, 299.47111693121815, 
147.2979188313539, 262.84644238896504, 206.9292795794744, 365.8621483689199], 
"eval_len": [48, 60, 44, 61, 47, 57, 28, 51, 43, 65]}

  2%|▏         | 19999/1000000 [12:00<9:44:14, 27.96it/s]global step 20000, trans_decision ep_re 301.78135009510964

{"global_step": 20000, "eval_re": [304.362190145773, 279.8540452464449, 
453.3100881824941, 280.2366633016033, 258.31770148472833, 290.1493434815593, 
312.3366896958425, 131.41022230776457, 404.69272763983844, 303.14382946504827], 
"eval_len": [57, 54, 87, 53, 50, 56, 58, 25, 77, 58]}

  3%|▎         | 29997/1000000 [20:10<9:46:46, 27.55it/s]global step 30000, trans_decision ep_re 278.6843853117151

{"global_step": 30000, "eval_re": [280.6554944055348, 296.7988845744232, 
324.0312633578161, 285.8382670231539, 348.8419333799898, 275.8211579402421, 
96.22361021821595, 313.97997123554023, 308.0465534321977, 256.60671755003716], 
"eval_len": [53, 56, 59, 53, 65, 52, 19, 61, 57, 49]}

  4%|▍         | 39997/1000000 [28:10<9:41:23, 27.52it/s]global step 40000, trans_decision ep_re 335.8845991682096

{"global_step": 40000, "eval_re": [152.74897285683005, 327.1037667664443, 
372.90441509620865, 162.53645342824407, 338.2055591719885, 252.13561506375308, 
303.93666764258865, 750.8630051879567, 386.55352597937343, 311.8580104887091], 
"eval_len": [29, 60, 72, 31, 63, 48, 57, 142, 72, 60]}

  5%|▍         | 49998/1000000 [35:55<9:37:37, 27.41it/s]global step 50000, trans_decision ep_re 348.6595434766757

{"global_step": 50000, "eval_re": [417.84397634485845, 345.12513712143084, 
331.23465785136455, 367.95284607409036, 394.189718141027, 114.04215206107831, 
409.59133124502586, 255.48497461580823, 350.681655225225, 500.448986086849], 
"eval_len": [77, 64, 61, 66, 74, 22, 76, 50, 64, 93]}

  6%|▌         | 59997/1000000 [43:54<9:24:56, 27.73it/s]global step 60000, trans_decision ep_re 361.49098985910575

{"global_step": 60000, "eval_re": [320.3399348589574, 418.21074356564185, 
448.16882560983566, 316.76485349508374, 371.52556253824474, 394.1115636849378, 
355.1042219540575, 330.4039275775571, 286.4729904856097, 373.80727482113195], 
"eval_len": [60, 77, 81, 58, 68, 81, 65, 60, 57, 69]}

  7%|▋         | 69997/1000000 [51:53<9:39:17, 26.76it/s]global step 70000, trans_decision ep_re 318.4641868720379

{"global_step": 70000, "eval_re": [413.73072844810685, 278.8996639214881, 
297.1649187323264, 303.1601938096309, 274.61426533696283, 371.69552698183895, 
411.7770291339741, 152.47542197896524, 119.41927594250896, 561.7048444345758], 
"eval_len": [81, 52, 55, 57, 51, 72, 76, 29, 23, 103]}

  8%|▊         | 79999/1000000 [59:51<9:26:33, 27.06it/s]global step 80000, trans_decision ep_re 320.0224156313327

{"global_step": 80000, "eval_re": [315.0478965782854, 449.24093577205207, 
438.63472395699216, 359.20584881531056, 387.9607041561768, 123.97216516503224, 
356.9079980845366, 157.46211507937818, 285.42053368556606, 326.37123501999685], 
"eval_len": [56, 86, 79, 66, 71, 24, 65, 30, 53, 60]}

  9%|▉         | 89999/1000000 [1:07:51<9:17:27, 27.21it/s]global step 90000, trans_decision ep_re 318.97716159583246

{"global_step": 90000, "eval_re": [444.8563011505217, 163.73187249131374, 
375.13260968423924, 152.26649478936122, 566.8542429754651, 342.39734847493605, 
449.2269250798748, 397.5992733517024, 119.32762275339525, 178.37892520751524], 
"eval_len": [82, 31, 70, 29, 108, 62, 86, 73, 23, 34]}

 10%|▉         | 99997/1000000 [1:15:50<9:18:21, 26.86it/s]global step 100000, trans_decision ep_re 312.0909002790703

{"global_step": 100000, "eval_re": [114.13407253117136, 322.22005809968044, 
166.97421105283763, 376.37662521104704, 102.74417084834674, 315.8813558359808, 
481.71860731865644, 292.1471284391744, 355.4545856739585, 593.2581877798495], 
"eval_len": [22, 60, 32, 68, 20, 57, 104, 55, 65, 112]}

 11%|█         | 109998/1000000 [1:24:00<8:47:39, 28.11it/s]global step 110000, trans_decision ep_re 343.35298863917666

{"global_step": 110000, "eval_re": [402.49141721164176, 385.82244078369774, 
595.3227869429043, 348.5534466904757, 197.7076518481864, 384.84020147300623, 
410.001467704746, 261.1939446389883, 300.4430961152085, 147.15343298291148], 
"eval_len": [74, 75, 111, 71, 37, 71, 76, 48, 56, 28]}

 12%|█▏        | 119997/1000000 [1:32:00<8:51:02, 27.62it/s]global step 120000, trans_decision ep_re 312.55640834193963

{"global_step": 120000, "eval_re": [342.67851067827297, 289.6512818695503, 
296.63990273820673, 152.11501552623557, 493.0818022121296, 301.6994248473102, 
261.52281165024266, 268.7875107457471, 382.8339275550201, 336.55389559668123], 
"eval_len": [62, 54, 55, 29, 92, 55, 49, 50, 71, 63]}

 13%|█▎        | 129998/1000000 [1:39:50<8:51:43, 27.27it/s]global step 130000, trans_decision ep_re 258.3501996891736

{"global_step": 130000, "eval_re": [383.02638885217726, 407.0079839940658, 
146.83704080257516, 372.83398021869533, 398.56980597974774, 174.53567592620504, 
174.87045396991763, 136.0451428365041, 158.18390812083084, 231.59161619101755], 
"eval_len": [70, 75, 28, 68, 72, 33, 33, 26, 30, 43]}

 14%|█▍        | 139999/1000000 [1:48:00<8:43:38, 27.37it/s]global step 140000, trans_decision ep_re 310.18612071866943

{"global_step": 140000, "eval_re": [381.514843489645, 114.63109325917274, 
129.848684667293, 607.4882802403112, 399.56762335900714, 141.56082981388275, 
363.7457200490748, 332.84201894815914, 120.2488977739551, 510.41321558619364], 
"eval_len": [70, 22, 25, 115, 75, 27, 67, 63, 23, 98]}

 15%|█▍        | 149997/1000000 [1:56:00<8:34:58, 27.51it/s]global step 150000, trans_decision ep_re 318.6835406356329

{"global_step": 150000, "eval_re": [293.3101474964926, 280.12467884154813, 
332.6364769846233, 377.57849159535334, 363.8687291124582, 136.64387796528996, 
322.22352013419635, 373.73997301992017, 356.6598435520533, 350.0496676543935], 
"eval_len": [55, 56, 62, 69, 67, 26, 59, 70, 66, 65]}

 16%|█▌        | 159997/1000000 [2:03:52<8:44:41, 26.68it/s]global step 160000, trans_decision ep_re 296.06861584762345

{"global_step": 160000, "eval_re": [390.5815656114594, 297.4362025328987, 
400.85160294416863, 119.73402333441706, 362.0519880749588, 299.48031773839557, 
146.06671648047148, 386.81790158831774, 274.62160219562844, 283.04423797551874],
"eval_len": [72, 56, 75, 23, 69, 57, 28, 71, 53, 53]}

 17%|█▋        | 169998/1000000 [2:11:52<8:23:53, 27.45it/s]global step 170000, trans_decision ep_re 317.9119140050401

{"global_step": 170000, "eval_re": [455.2234127611907, 308.0619477802101, 
142.03840186097935, 459.0985600118734, 343.02015281303466, 113.8265375674589, 
360.4143656936598, 175.61610447724806, 519.6955292061431, 302.12412787860313], 
"eval_len": [84, 57, 27, 85, 63, 22, 64, 33, 108, 56]}

 18%|█▊        | 179999/1000000 [2:20:10<8:21:43, 27.24it/s]global step 180000, trans_decision ep_re 320.39047791764074

{"global_step": 180000, "eval_re": [108.37694035940234, 335.44630228873257, 
279.5434252414343, 387.5520568370731, 332.71572293877085, 317.3721841378592, 
267.02585535931263, 367.8406996372271, 365.6514343518326, 442.38015802476264], 
"eval_len": [21, 64, 52, 73, 61, 61, 51, 67, 67, 82]}

 19%|█▉        | 189999/1000000 [2:28:03<8:27:24, 26.61it/s]global step 190000, trans_decision ep_re 293.4259402622943

{"global_step": 190000, "eval_re": [402.9501103559374, 298.6805202014572, 
141.03060112241997, 286.0847217753936, 444.58672903089666, 355.6795412190307, 
151.67576146861074, 142.15170372058256, 345.17374545939555, 366.2459682692186], 
"eval_len": [73, 56, 27, 55, 81, 67, 29, 27, 63, 66]}

 20%|█▉        | 199997/1000000 [2:36:20<8:33:24, 25.97it/s]global step 200000, trans_decision ep_re 284.47352954889413

{"global_step": 200000, "eval_re": [247.5645347204263, 401.25803213757075, 
326.59645352943414, 255.94230702775522, 340.707277766154, 114.33832835540758, 
343.5679312942917, 195.5123806693809, 350.2790300819484, 268.9690199065721], 
"eval_len": [50, 74, 61, 50, 63, 22, 64, 37, 63, 52]}

 21%|██        | 209997/1000000 [2:44:14<8:09:24, 26.90it/s]global step 210000, trans_decision ep_re 320.70911275930314

{"global_step": 210000, "eval_re": [265.37472553801643, 119.73106905080417, 
538.4805067712189, 315.57378665712923, 349.84271863580864, 293.1341226026691, 
325.82095418902827, 410.17208057977257, 317.0179352555121, 271.94322831307244], 
"eval_len": [49, 23, 98, 59, 65, 54, 60, 76, 60, 53]}

 22%|██▏       | 219997/1000000 [2:52:20<7:55:45, 27.32it/s]global step 220000, trans_decision ep_re 271.4714586310678

{"global_step": 220000, "eval_re": [323.88057070014645, 385.05208884795996, 
330.939562956896, 180.77087089083068, 307.5914594756544, 329.4647360257319, 
146.5269126955403, 271.8718671829663, 286.69330881738256, 151.92320871756957], 
"eval_len": [60, 71, 63, 34, 57, 61, 28, 54, 54, 29]}

 23%|██▎       | 229997/1000000 [3:00:40<8:02:44, 26.58it/s]global step 230000, trans_decision ep_re 305.54470231757375

{"global_step": 230000, "eval_re": [492.5798617355832, 239.67801109317656, 
269.6062064029995, 136.50184850754684, 331.9418123866868, 304.65650842027975, 
399.68486210465835, 306.1460685698348, 309.8570175750269, 264.79482637994494], 
"eval_len": [103, 44, 53, 26, 60, 58, 72, 60, 60, 51]}

 24%|██▍       | 239998/1000000 [3:08:31<7:34:44, 27.85it/s]global step 240000, trans_decision ep_re 272.5046506911825

{"global_step": 240000, "eval_re": [209.09190754459834, 188.1644215870171, 
371.56106092672354, 376.78419404211724, 152.91234146477294, 308.1056342796124, 
145.75745079686314, 443.4876974566802, 261.39305957510703, 267.7887392383331], 
"eval_len": [39, 36, 69, 69, 29, 58, 28, 81, 50, 50]}

 25%|██▍       | 249997/1000000 [3:16:36<7:38:52, 27.24it/s]global step 250000, trans_decision ep_re 295.02540422533275

{"global_step": 250000, "eval_re": [146.7069969395266, 275.86229588366666, 
337.3962561750646, 156.90557483905783, 410.0202826539343, 425.1198399186924, 
292.36572123254973, 318.0631559709287, 279.1787066483487, 308.63521199155826], 
"eval_len": [28, 53, 62, 30, 75, 78, 55, 60, 52, 59]}

 26%|██▌       | 259997/1000000 [3:24:42<7:44:01, 26.58it/s]global step 260000, trans_decision ep_re 311.728629056519

{"global_step": 260000, "eval_re": [310.76801351090944, 297.8152386181656, 
150.9130833944063, 285.62106996591353, 432.1396929838734, 386.3761800367587, 
378.76959946743943, 384.90343329748924, 135.37010329356985, 354.6098759966643], 
"eval_len": [58, 58, 29, 53, 81, 72, 71, 71, 26, 68]}

 27%|██▋       | 269999/1000000 [3:33:00<7:41:41, 26.35it/s]global step 270000, trans_decision ep_re 258.3410628400704

{"global_step": 270000, "eval_re": [269.0548444968295, 367.365574294212, 
266.53989719119284, 421.0026277824282, 140.95479311238867, 300.7158006131203, 
173.61948184361614, 168.5490651888431, 202.90611956611917, 272.70242431195373], 
"eval_len": [51, 68, 51, 81, 27, 56, 33, 32, 38, 52]}

 28%|██▊       | 279998/1000000 [3:40:52<7:29:42, 26.68it/s]global step 280000, trans_decision ep_re 384.5973558498201

{"global_step": 280000, "eval_re": [349.0382370355768, 429.8558361996003, 
376.11191766477, 479.4965373247846, 409.8945309402931, 367.0314758435606, 
309.00864678093745, 500.3598585018445, 450.3748037901499, 174.80171441668392], 
"eval_len": [66, 77, 68, 95, 79, 67, 56, 104, 87, 33]}

 29%|██▉       | 289998/1000000 [3:49:01<7:10:28, 27.49it/s]global step 290000, trans_decision ep_re 348.5391512365533

{"global_step": 290000, "eval_re": [422.65472804541963, 347.48259158821116, 
321.7843987449721, 384.16921313765863, 335.54793165993715, 566.8207125866835, 
344.9395596339142, 529.707989442439, 136.07108625993268, 96.21330126636525], 
"eval_len": [79, 64, 59, 73, 64, 110, 63, 111, 26, 19]}

 30%|██▉       | 299999/1000000 [3:57:20<7:07:34, 27.29it/s]global step 300000, trans_decision ep_re 303.15435539662406

{"global_step": 300000, "eval_re": [343.6930013356132, 284.13802709460316, 
422.26567850861954, 385.0773851873223, 271.168517706076, 326.89705701913385, 
152.24415638413922, 407.5735457675524, 316.2842269181622, 122.2019580450192], 
"eval_len": [62, 54, 77, 72, 51, 61, 29, 75, 58, 24]}

 31%|███       | 309999/1000000 [4:05:30<7:05:24, 27.03it/s]global step 310000, trans_decision ep_re 319.92840299973943

{"global_step": 310000, "eval_re": [677.8232075545125, 136.08223079775001, 
243.43031305359528, 276.42002055267255, 244.02034572175435, 365.62640629668215, 
356.1483087687029, 444.33789308280876, 113.7283588947003, 341.66694527421566], 
"eval_len": [123, 26, 49, 52, 50, 68, 64, 82, 22, 62]}

 32%|███▏      | 319999/1000000 [4:13:23<7:13:42, 26.13it/s]global step 320000, trans_decision ep_re 360.42736153631915

{"global_step": 320000, "eval_re": [304.18561054751916, 378.69081168255343, 
317.09542342367916, 307.8340581713779, 334.99466591259977, 319.0095018150531, 
670.6426655314995, 311.53351164489027, 316.113186926826, 344.1741797071936], 
"eval_len": [55, 70, 58, 57, 61, 63, 118, 60, 58, 63]}

 33%|███▎      | 329999/1000000 [4:21:40<6:51:49, 27.12it/s]global step 330000, trans_decision ep_re 296.55158371167647

{"global_step": 330000, "eval_re": [142.11128577702942, 308.00318142783976, 
267.6635882835928, 283.0458938048784, 364.2889484532831, 391.94188051440955, 
146.531358782873, 353.8210993131996, 415.72933651861405, 292.3792642410445], 
"eval_len": [27, 57, 51, 54, 70, 74, 28, 63, 77, 55]}

 34%|███▍      | 339999/1000000 [4:29:35<6:49:22, 26.87it/s]global step 340000, trans_decision ep_re 326.30434586228023

{"global_step": 340000, "eval_re": [375.9301010147319, 318.3363322059092, 
325.00933896232317, 279.34166564865177, 295.32039027509, 448.6186644751356, 
316.0520658536777, 365.49576690805765, 196.1280659283299, 342.81106735089514], 
"eval_len": [68, 60, 60, 55, 54, 83, 59, 66, 37, 63]}

 35%|███▍      | 349997/1000000 [4:37:41<6:37:51, 27.23it/s]global step 350000, trans_decision ep_re 328.84050600800936

{"global_step": 350000, "eval_re": [442.685354380472, 379.12919475048636, 
238.818405947812, 531.5837101739116, 234.38189400954258, 251.478921121052, 
200.4811935017346, 358.46834005373233, 367.0376228749663, 284.3404232663839], 
"eval_len": [79, 68, 48, 96, 47, 48, 38, 66, 69, 55]}

 36%|███▌      | 359997/1000000 [4:46:00<6:26:47, 27.58it/s]global step 360000, trans_decision ep_re 282.06463720210905

{"global_step": 360000, "eval_re": [342.9411686817068, 235.51914277902793, 
309.1943809634787, 321.5311744326266, 465.35325255038896, 156.02257695162652, 
146.19875309373256, 299.5617111049192, 147.58328398056034, 396.7409274830228], 
"eval_len": [63, 47, 61, 61, 82, 30, 28, 57, 28, 73]}

 37%|███▋      | 369998/1000000 [4:54:00<6:21:43, 27.51it/s]global step 370000, trans_decision ep_re 243.27694064988842

{"global_step": 370000, "eval_re": [400.76177286160805, 114.233606313569, 
109.02522862441083, 207.99889187602437, 309.3985346648492, 314.6634020636421, 
185.76826118848513, 344.1526090705203, 310.8534840885912, 135.91361574718402], 
"eval_len": [74, 22, 21, 41, 59, 59, 35, 65, 59, 26]}

 38%|███▊      | 379997/1000000 [5:01:51<6:23:19, 26.96it/s]global step 380000, trans_decision ep_re 321.9710650701303

{"global_step": 380000, "eval_re": [321.81257950845355, 304.4951503534467, 
355.84558012320986, 324.82273118925525, 556.4163556686046, 496.92746306702713, 
150.87149588260354, 253.34000866394453, 314.49943693713703, 140.67984930762032],
"eval_len": [60, 57, 64, 59, 116, 89, 29, 50, 60, 27]}

 39%|███▉      | 389998/1000000 [5:09:53<6:04:19, 27.91it/s]global step 390000, trans_decision ep_re 312.3942634347243

{"global_step": 390000, "eval_re": [359.29391219716825, 423.2500344835193, 
406.2132587198326, 108.58095687796767, 324.07195903496273, 292.271953287947, 
372.9766868994845, 274.65760220721, 448.2768483440934, 114.34942229505707], 
"eval_len": [67, 77, 74, 21, 58, 58, 72, 52, 81, 22]}

 40%|███▉      | 399999/1000000 [5:17:54<6:07:01, 27.25it/s]global step 400000, trans_decision ep_re 316.42846489608144

{"global_step": 400000, "eval_re": [338.22560133269565, 402.33215207486796, 
357.8616830511327, 123.18386137828996, 413.06756885284045, 274.91651795405045, 
328.71825400545947, 144.75749301579953, 419.7018575941393, 361.5196597015394], 
"eval_len": [62, 75, 65, 24, 71, 53, 61, 28, 79, 71]}

 41%|████      | 409999/1000000 [5:25:54<6:11:13, 26.49it/s]global step 410000, trans_decision ep_re 289.0808302685818

{"global_step": 410000, "eval_re": [255.95838587849514, 313.73926171595014, 
307.0327385947287, 313.0372629992833, 246.2000513449918, 304.0313127975861, 
353.47179390812494, 312.02438278211304, 312.39305537393733, 172.92005729060728],
"eval_len": [49, 60, 59, 60, 47, 58, 64, 56, 59, 33]}

 42%|████▏     | 419997/1000000 [5:33:53<5:48:46, 27.72it/s]global step 420000, trans_decision ep_re 323.2412820898586

{"global_step": 420000, "eval_re": [130.72450294621092, 266.793848904759, 
302.75447311084565, 226.94524269779927, 222.6996572123207, 544.1814937497188, 
266.28848670159994, 727.2437618295086, 297.51568271916824, 247.26567102665445], 
"eval_len": [25, 53, 60, 45, 45, 107, 49, 128, 56, 46]}

 43%|████▎     | 429997/1000000 [5:41:56<5:52:59, 26.91it/s]global step 430000, trans_decision ep_re 323.18575802490284

{"global_step": 430000, "eval_re": [322.98871718838654, 426.146289590684, 
244.13304982738404, 232.27927024275988, 371.4196241675252, 156.99485836198338, 
349.51166831663045, 326.22275275545724, 469.39685108772676, 332.76449871049124],
"eval_len": [58, 83, 49, 44, 69, 30, 64, 60, 90, 60]}

 44%|████▍     | 439997/1000000 [5:50:10<5:39:44, 27.47it/s]global step 440000, trans_decision ep_re 318.21827594790574

{"global_step": 440000, "eval_re": [838.281615647067, 303.91936763007965, 
346.5611948514396, 233.9925472690759, 273.37540172471745, 264.1763802448198, 
204.16671477282296, 345.39626597504656, 96.8942262827496, 275.419045081239], 
"eval_len": [156, 56, 65, 44, 53, 52, 39, 62, 19, 53]}

 45%|████▍     | 449998/1000000 [5:57:57<5:29:58, 27.78it/s]global step 450000, trans_decision ep_re 245.39995576678766

{"global_step": 450000, "eval_re": [213.19595364174396, 185.01545211598938, 
247.4653652936906, 158.4522639902352, 258.2559858995595, 346.6095033333534, 
278.7546210442726, 323.4047196204699, 276.07352138487056, 166.77217134369147], 
"eval_len": [44, 35, 50, 30, 50, 67, 54, 58, 53, 32]}

 46%|████▌     | 459998/1000000 [6:05:55<5:15:32, 28.52it/s]global step 460000, trans_decision ep_re 348.22518756356203

{"global_step": 460000, "eval_re": [253.7066926018267, 612.7786998658656, 
280.64493561490224, 270.88814660555767, 281.28080827566185, 309.2555699138997, 
797.239179135216, 131.1249124144842, 135.52525341526774, 409.80767779293797], 
"eval_len": [50, 109, 54, 52, 53, 60, 167, 25, 26, 74]}

 47%|████▋     | 469997/1000000 [6:14:10<5:30:18, 26.74it/s]global step 470000, trans_decision ep_re 265.1999405448341

{"global_step": 470000, "eval_re": [271.4566399772268, 108.63115265400181, 
244.9116494541078, 252.95914501902402, 249.8482959472947, 404.7205500340266, 
231.8545069558864, 102.65868661924085, 671.8643708858526, 113.09440790167942], 
"eval_len": [53, 21, 47, 50, 49, 70, 46, 20, 123, 22]}

 48%|████▊     | 479997/1000000 [6:22:10<5:26:42, 26.53it/s]global step 480000, trans_decision ep_re 361.5094543390771

{"global_step": 480000, "eval_re": [1062.157561366372, 211.85745503147515, 
283.232040521752, 229.72447673022444, 388.49648824848407, 257.7585856833321, 
262.76309598544486, 259.90772879401646, 507.8762746351075, 151.32083639456235], 
"eval_len": [219, 43, 54, 45, 72, 49, 51, 51, 91, 29]}

 49%|████▉     | 489997/1000000 [6:29:56<5:06:10, 27.76it/s]global step 490000, trans_decision ep_re 240.22672641208095

{"global_step": 490000, "eval_re": [244.83335018026665, 238.85970752216144, 
222.90049323617575, 400.5677957087321, 391.65501512528283, 141.75797913360148, 
113.87578092843947, 245.3895944291853, 157.65152664149878, 244.77602121546565], 
"eval_len": [47, 47, 42, 72, 68, 27, 22, 48, 30, 48]}

 50%|████▉     | 499999/1000000 [6:37:54<5:00:52, 27.70it/s]global step 500000, trans_decision ep_re 366.41169208679514

{"global_step": 500000, "eval_re": [183.74826070689633, 300.4104743082511, 
576.1782331025516, 299.5258622813146, 217.451735852475, 399.9298062911629, 
354.10347038021524, 237.70319154620728, 359.0166286735713, 736.0492577253061], 
"eval_len": [35, 58, 125, 57, 44, 69, 66, 47, 64, 144]}

 51%|█████     | 509998/1000000 [6:45:53<4:52:34, 27.91it/s]global step 510000, trans_decision ep_re 291.70100510053214

{"global_step": 510000, "eval_re": [102.9548389393422, 272.0107873541025, 
136.33786997746293, 485.01573190234996, 443.1566551014487, 415.83754631790566, 
286.68289231153324, 114.18332690719011, 174.71488409245168, 486.1155181015348], 
"eval_len": [20, 53, 26, 87, 78, 78, 56, 22, 33, 88]}

 52%|█████▏    | 519998/1000000 [6:53:55<4:51:37, 27.43it/s]global step 520000, trans_decision ep_re 296.2874975919879

{"global_step": 520000, "eval_re": [278.5518552282283, 292.3269782814, 
229.27955836233585, 258.6172103838763, 163.19723195681726, 776.9797549580497, 
119.54135829597286, 326.1959535746508, 392.9367028699207, 125.24837200862693], 
"eval_len": [54, 56, 43, 50, 31, 149, 23, 63, 73, 24]}

 53%|█████▎    | 529997/1000000 [7:01:56<4:43:28, 27.63it/s]global step 530000, trans_decision ep_re 269.44776376947743

{"global_step": 530000, "eval_re": [207.0601500767731, 272.5052071969437, 
484.36243119711975, 162.94724915488956, 342.29236479804797, 146.96458039685754, 
108.63991490881345, 459.9269451331098, 269.9336763320077, 239.84511850021173], 
"eval_len": [41, 51, 92, 31, 60, 28, 21, 83, 52, 47]}

 54%|█████▍    | 539999/1000000 [7:09:56<4:38:27, 27.53it/s]global step 540000, trans_decision ep_re 253.15791567847185

{"global_step": 540000, "eval_re": [140.925048126516, 362.28036755597316, 
242.0102364455316, 439.88184137332746, 249.87794841883309, 239.84569882531116, 
346.6746574187264, 163.35179633061318, 244.17206118204854, 102.55950110783827], 
"eval_len": [27, 65, 47, 74, 49, 47, 64, 31, 45, 20]}

 55%|█████▍    | 549999/1000000 [7:18:10<4:39:47, 26.81it/s]global step 550000, trans_decision ep_re 357.849985004029

{"global_step": 550000, "eval_re": [243.1316563238058, 397.4828968713347, 
309.1412820244863, 209.46181025721094, 184.87466242232807, 593.2492507403275, 
401.0835023054893, 267.89243440322133, 252.57680742932365, 719.6055472627627], 
"eval_len": [48, 73, 59, 39, 35, 120, 73, 50, 49, 126]}

 56%|█████▌    | 559997/1000000 [7:26:10<4:22:28, 27.94it/s]global step 560000, trans_decision ep_re 264.0171006702134

{"global_step": 560000, "eval_re": [183.25752077651234, 146.40533397006112, 
366.33980596934816, 241.47120792478344, 211.0697694241499, 222.39793016671982, 
125.12521844297869, 631.8369642149448, 250.6290311734642, 261.6382246391715], 
"eval_len": [36, 28, 69, 48, 40, 44, 24, 124, 49, 50]}

 57%|█████▋    | 569998/1000000 [7:34:10<4:20:45, 27.48it/s]global step 570000, trans_decision ep_re 394.73714220691704

{"global_step": 570000, "eval_re": [767.3627978341029, 229.10685874594822, 
253.97751481505205, 274.1019909221562, 348.45886832788415, 939.7716551780009, 
426.1574809761039, 163.5869443361952, 307.43231387295964, 237.41499706076735], 
"eval_len": [148, 47, 53, 53, 66, 170, 88, 31, 58, 46]}

 58%|█████▊    | 579999/1000000 [7:42:10<4:09:59, 28.00it/s]global step 580000, trans_decision ep_re 464.8742181846634

{"global_step": 580000, "eval_re": [235.08161860040278, 284.09702901394064, 
399.92887246016426, 322.0838808074751, 387.6741095468695, 119.22633047965535, 
278.9088579180708, 2174.4031463852248, 254.3128000120897, 193.0255366227414], 
"eval_len": [47, 54, 69, 61, 68, 23, 52, 416, 49, 37]}

 59%|█████▉    | 589997/1000000 [7:50:01<4:09:04, 27.43it/s]global step 590000, trans_decision ep_re 305.1707857668199

{"global_step": 590000, "eval_re": [208.84734703125233, 502.2739784765085, 
119.64213246394908, 493.63727998347815, 130.71891755655656, 252.11565349124146, 
291.2331244451367, 267.1328592371538, 415.9662260757564, 370.14033890716604], 
"eval_len": [43, 87, 23, 96, 25, 48, 56, 51, 75, 66]}

 60%|█████▉    | 599999/1000000 [7:58:04<4:00:52, 27.68it/s]global step 600000, trans_decision ep_re 259.65441681317975

{"global_step": 600000, "eval_re": [233.54241940823266, 338.48922248665576, 
243.57269553394772, 119.90866571378265, 223.1930824883538, 119.83099945909652, 
429.8846164222731, 139.81758961303348, 478.5881860716059, 269.716690934816], 
"eval_len": [45, 62, 49, 23, 47, 23, 81, 27, 88, 52]}

 61%|██████    | 609997/1000000 [8:06:02<3:58:57, 27.20it/s]global step 610000, trans_decision ep_re 383.9221459412932

{"global_step": 610000, "eval_re": [252.60891379445073, 556.316553299473, 
335.24772615618144, 242.68998882336513, 526.2967819984797, 378.9011183452687, 
570.0576334330375, 604.4294071849953, 96.63096216710714, 276.04237421057303], 
"eval_len": [49, 101, 58, 48, 106, 72, 98, 115, 19, 54]}

 62%|██████▏   | 619998/1000000 [8:14:03<3:47:10, 27.88it/s]global step 620000, trans_decision ep_re 423.4499996871649

{"global_step": 620000, "eval_re": [613.8258097870169, 518.608285606848, 
130.9072802825347, 689.0461248214618, 330.1782110560231, 386.2579786855728, 
442.48754469488955, 321.02748691445714, 331.9313330232758, 470.22994199956906], 
"eval_len": [110, 98, 25, 129, 61, 68, 81, 59, 63, 87]}

 63%|██████▎   | 629999/1000000 [8:22:04<3:43:44, 27.56it/s]global step 630000, trans_decision ep_re 391.90828926509306

{"global_step": 630000, "eval_re": [146.5516663484185, 108.19057823970599, 
379.3411288962402, 1039.4027447996802, 478.08063514780514, 917.3889135080414, 
107.59581484696189, 285.0878180864775, 289.43539295850934, 168.00819981909052], 
"eval_len": [28, 21, 71, 184, 84, 169, 21, 52, 57, 32]}

 64%|██████▍   | 639998/1000000 [8:30:05<3:32:54, 28.18it/s]global step 640000, trans_decision ep_re 255.9456534931931

{"global_step": 640000, "eval_re": [103.50195371464578, 114.24085329419933, 
311.85730909753516, 250.48312571120044, 421.6222352552871, 243.04815216662462, 
263.93246759627266, 202.45186438918034, 324.42866164853393, 323.8899120584514], 
"eval_len": [20, 22, 58, 49, 75, 47, 51, 38, 60, 59]}

 65%|██████▍   | 649997/1000000 [8:38:03<3:29:42, 27.82it/s]global step 650000, trans_decision ep_re 291.52892042276557

{"global_step": 650000, "eval_re": [503.79645349836727, 249.2263294016342, 
252.5982588071437, 244.0851272309962, 251.08088074834467, 177.4817658563093, 
238.5773325706922, 424.6722040161764, 437.3874957226556, 136.38335637533586], 
"eval_len": [96, 49, 49, 46, 49, 34, 46, 78, 77, 26]}

 66%|██████▌   | 659999/1000000 [8:46:10<3:26:10, 27.48it/s]global step 660000, trans_decision ep_re 340.53766017305594

{"global_step": 660000, "eval_re": [677.9478086658257, 267.07784533660276, 
649.2072017957291, 250.6060031172086, 130.40772206847444, 262.1295856270882, 
268.7744200760139, 223.8223196146354, 359.45915893102074, 315.94453649796], 
"eval_len": [121, 52, 129, 48, 25, 53, 51, 43, 66, 59]}

 67%|██████▋   | 669999/1000000 [8:53:54<3:23:41, 27.00it/s]global step 670000, trans_decision ep_re 338.4739417528865

{"global_step": 670000, "eval_re": [475.04662655728526, 779.449227198086, 
333.993766415416, 304.75935020635876, 266.4654765899176, 169.04164539066582, 
163.0200695940221, 296.9037960923316, 356.017233503721, 240.04222598106116], 
"eval_len": [83, 155, 64, 57, 52, 32, 31, 56, 66, 47]}

 68%|██████▊   | 679997/1000000 [9:01:50<3:11:48, 27.81it/s]global step 680000, trans_decision ep_re 230.83343147472198

{"global_step": 680000, "eval_re": [280.71067961602597, 119.16186531399073, 
249.80230635395222, 275.2413824799542, 114.03791297178792, 109.05991452033004, 
414.3405295509084, 162.7258646826101, 295.9184910285329, 287.3353682291274], 
"eval_len": [53, 23, 49, 50, 22, 21, 78, 31, 56, 56]}

 69%|██████▉   | 689997/1000000 [9:09:44<3:02:35, 28.30it/s]global step 690000, trans_decision ep_re 223.9811001547189

{"global_step": 690000, "eval_re": [288.4953579306097, 543.6557875557921, 
185.5619458323468, 134.40362733261932, 146.79761664824778, 253.69946647280258, 
103.0325677798461, 148.27741935827146, 261.81511128649396, 174.07210135015893], 
"eval_len": [55, 105, 35, 26, 28, 47, 20, 28, 51, 33]}

 70%|██████▉   | 699997/1000000 [9:17:50<2:59:03, 27.92it/s]global step 700000, trans_decision ep_re 229.1536920606846

{"global_step": 700000, "eval_re": [261.85607237839315, 435.21600860045294, 
338.4720979166657, 235.2834185163804, 230.63834693526533, 124.99965403100965, 
114.07846576296674, 114.43483664070729, 192.56507989537238, 243.99293992963212],
"eval_len": [49, 75, 65, 46, 44, 24, 22, 22, 36, 47]}

 71%|███████   | 709997/1000000 [9:25:50<2:57:45, 27.19it/s]global step 710000, trans_decision ep_re 313.19906590170774

{"global_step": 710000, "eval_re": [236.83349079358902, 554.4132802995651, 
259.1627406510375, 103.13307701645331, 823.7460558921629, 238.90064504855619, 
108.55314883074854, 263.54879877854205, 255.70441110191388, 287.9950106045089], 
"eval_len": [46, 98, 51, 20, 164, 46, 21, 50, 49, 56]}

 72%|███████▏  | 719998/1000000 [9:33:35<2:49:36, 27.51it/s]global step 720000, trans_decision ep_re 358.9281071881004

{"global_step": 720000, "eval_re": [144.95431022542738, 618.842089225648, 
246.0679647569723, 531.5498094777051, 394.13981817708947, 421.81461762440466, 
317.1113887863113, 295.8327419882678, 348.89556065865776, 270.07277096051973], 
"eval_len": [28, 111, 48, 98, 69, 79, 59, 57, 67, 52]}

 73%|███████▎  | 729999/1000000 [9:41:33<2:42:01, 27.77it/s]global step 730000, trans_decision ep_re 296.4700951181137

{"global_step": 730000, "eval_re": [294.50718128109435, 231.1758642001771, 
245.72797388780054, 234.07445132280873, 103.13115989846705, 787.4093006226155, 
252.58072391732352, 140.39711551271333, 296.9889113931824, 378.70826914495444], 
"eval_len": [54, 46, 47, 46, 20, 144, 49, 27, 56, 70]}

 74%|███████▍  | 739997/1000000 [9:49:30<2:35:50, 27.81it/s]global step 740000, trans_decision ep_re 415.171572366641

{"global_step": 740000, "eval_re": [357.35755454299306, 393.1929854616077, 
260.82256015564644, 267.98088656466075, 377.4271193447015, 665.108285299294, 
634.7542541944389, 666.3253881518426, 276.84092664227916, 251.9057633089456], 
"eval_len": [66, 72, 49, 51, 70, 127, 132, 128, 52, 47]}

 75%|███████▍  | 749999/1000000 [9:57:40<2:28:35, 28.04it/s]global step 750000, trans_decision ep_re 442.1402700359114

{"global_step": 750000, "eval_re": [206.50199394495658, 811.8187682238628, 
292.6203968419055, 467.93212111826404, 762.6076410336732, 250.49345529780226, 
699.7401709464727, 298.0462677981109, 319.1509953256792, 312.4908898283868], 
"eval_len": [39, 154, 53, 86, 141, 49, 134, 56, 59, 59]}

 76%|███████▌  | 759997/1000000 [10:05:40<2:30:30, 26.58it/s]global step 760000, trans_decision ep_re 405.30907434909335

{"global_step": 760000, "eval_re": [191.00068345486375, 530.3191462716662, 
657.9390566931324, 345.09202648341534, 317.4389322719764, 250.87510268179196, 
471.0285137919056, 295.68223869752774, 131.07625612349042, 862.6387870211637], 
"eval_len": [36, 101, 119, 65, 58, 48, 86, 56, 25, 159]}

 77%|███████▋  | 769998/1000000 [10:13:25<2:14:40, 28.46it/s]global step 770000, trans_decision ep_re 418.59984629902

{"global_step": 770000, "eval_re": [287.60747071804184, 261.0248523901287, 
662.0757619102523, 131.54142136462355, 628.7688907665546, 830.5126473277749, 
251.07071867014477, 604.257412724241, 261.6344225171665, 267.50486460127235], 
"eval_len": [54, 49, 126, 25, 115, 155, 48, 114, 51, 49]}

 78%|███████▊  | 779998/1000000 [10:21:23<2:10:59, 27.99it/s]global step 780000, trans_decision ep_re 280.8955464819372

{"global_step": 780000, "eval_re": [330.93632019905226, 156.59911233936447, 
184.30847372303094, 681.4767344973061, 130.2106691467553, 346.32873191842765, 
158.4618853616623, 147.32038877050616, 331.5544207559822, 341.75872810728384], 
"eval_len": [62, 30, 35, 134, 25, 62, 30, 28, 61, 62]}

 79%|███████▉  | 789998/1000000 [10:29:30<2:04:05, 28.20it/s]global step 790000, trans_decision ep_re 436.0921017413745

{"global_step": 790000, "eval_re": [246.69734646240244, 240.08192455169765, 
478.55727135100517, 862.2547567644477, 771.5497621632464, 616.5340163467221, 
340.4185121558652, 393.05995571764265, 303.7471487992969, 108.02032310141965], 
"eval_len": [48, 47, 88, 151, 144, 127, 64, 71, 58, 21]}

 80%|███████▉  | 799998/1000000 [10:37:15<1:59:08, 27.98it/s]global step 800000, trans_decision ep_re 242.56613966272636

{"global_step": 800000, "eval_re": [119.26616075133582, 250.763084993529, 
272.66243019782974, 268.56857297756676, 259.36272499192944, 334.37953752584264, 
261.00384943136385, 252.65038644605053, 141.44130687027175, 265.5633424415443], 
"eval_len": [23, 48, 52, 52, 49, 60, 51, 47, 27, 52]}

 81%|████████  | 809998/1000000 [10:45:11<1:51:14, 28.47it/s]global step 810000, trans_decision ep_re 334.7803664651272

{"global_step": 810000, "eval_re": [267.5239182991527, 655.5090942037774, 
140.79976075829398, 539.9709829804684, 298.3137733583793, 319.5666128809471, 
379.72986273917, 118.16313272861338, 173.52905262756414, 454.69747407490564], 
"eval_len": [50, 119, 27, 94, 55, 61, 73, 23, 33, 82]}

 82%|████████▏ | 819997/1000000 [10:53:11<1:49:27, 27.41it/s]global step 820000, trans_decision ep_re 461.79205567190354

{"global_step": 820000, "eval_re": [269.7410850275776, 279.11835198367476, 
349.67611979434895, 288.6293382526196, 362.29823421286386, 442.1660261611549, 
670.683688123545, 472.27861437135635, 793.763604130149, 689.5654946617456], 
"eval_len": [52, 51, 64, 54, 66, 82, 132, 82, 148, 134]}

 83%|████████▎ | 829999/1000000 [11:01:12<1:45:32, 26.84it/s]global step 830000, trans_decision ep_re 372.61511739711347

{"global_step": 830000, "eval_re": [109.10364221280295, 426.06877474905156, 
585.0734883952044, 494.0121362648547, 537.0865916215466, 283.7764356500387, 
239.15719951859202, 605.8796944239332, 97.42180707852341, 348.5714040565868], 
"eval_len": [21, 76, 98, 86, 97, 54, 47, 111, 19, 65]}

 84%|████████▍ | 839997/1000000 [11:09:14<1:35:13, 28.01it/s]global step 840000, trans_decision ep_re 544.0887745653844

{"global_step": 840000, "eval_re": [257.763845855082, 928.682227504731, 
1140.5675066405827, 1213.629498347103, 126.03902220984304, 302.65063079127617, 
113.91282584280478, 897.746197710509, 119.38991312326951, 340.50607762864246], 
"eval_len": [49, 185, 203, 222, 24, 56, 22, 162, 23, 61]}

 85%|████████▍ | 849999/1000000 [11:17:14<1:32:47, 26.94it/s]global step 850000, trans_decision ep_re 451.0141246764588

{"global_step": 850000, "eval_re": [247.57293837749637, 740.4600231267806, 
274.8856763965404, 1174.8881666660254, 294.6252621812709, 302.5794949133094, 
597.1227564223998, 118.30914535420854, 333.1693700575765, 426.52841326897965], 
"eval_len": [48, 134, 52, 216, 58, 56, 104, 23, 61, 84]}

 86%|████████▌ | 859998/1000000 [11:25:14<1:22:57, 28.13it/s]global step 860000, trans_decision ep_re 371.1109629407696

{"global_step": 860000, "eval_re": [265.0614151219648, 376.41419194949793, 
382.57426971071874, 438.86399041842526, 376.2606857286882, 668.4298723789215, 
512.2931373470667, 420.91434353600334, 114.5294842629683, 155.7682389534412], 
"eval_len": [50, 72, 70, 83, 71, 134, 98, 85, 22, 30]}

 87%|████████▋ | 869997/1000000 [11:33:12<1:17:52, 27.82it/s]global step 870000, trans_decision ep_re 461.03270643568413

{"global_step": 870000, "eval_re": [1233.604018201723, 301.63113785031203, 
460.2749432027517, 371.68441953620993, 97.72075163100432, 350.9358139208483, 
279.7480446400735, 400.11121877020605, 343.2840290743664, 771.3326875293454], 
"eval_len": [236, 57, 85, 69, 19, 66, 56, 72, 65, 141]}

 88%|████████▊ | 879998/1000000 [11:41:13<1:12:24, 27.62it/s]global step 880000, trans_decision ep_re 248.6135428421979

{"global_step": 880000, "eval_re": [304.76903128410544, 234.36023512855127, 
325.3679010788277, 220.83513700891473, 119.51396311756358, 108.34610812586, 
169.06107363302286, 234.22774719210926, 638.9881670256996, 130.66606482732436], 
"eval_len": [58, 46, 61, 44, 23, 21, 32, 45, 117, 25]}

 89%|████████▉ | 889998/1000000 [11:49:14<1:05:17, 28.08it/s]global step 890000, trans_decision ep_re 352.55843767935596

{"global_step": 890000, "eval_re": [281.03622858910273, 183.7518758006194, 
819.910053472604, 147.89724177966886, 258.4259028727077, 429.46706506310284, 
153.28180540780897, 460.9222014261868, 120.28673707064583, 670.6052653111129], 
"eval_len": [54, 35, 150, 28, 49, 83, 29, 80, 23, 125]}

 90%|████████▉ | 899998/1000000 [11:57:14<1:00:27, 27.57it/s]global step 900000, trans_decision ep_re 410.12620163429165

{"global_step": 900000, "eval_re": [432.3583648588882, 336.2852961552854, 
344.9810257447717, 216.07232120743677, 235.78701151513073, 244.14920384923767, 
275.1700164008433, 1278.6265509290777, 371.39638561948874, 366.4358400627567], 
"eval_len": [76, 61, 64, 42, 46, 47, 52, 255, 67, 67]}

 91%|█████████ | 909999/1000000 [12:05:14<53:53, 27.83it/s]global step 910000, trans_decision ep_re 484.227985008814

{"global_step": 910000, "eval_re": [1296.7033821194275, 163.63913469771265, 
408.20623318002043, 417.4694581407776, 320.9491918994224, 343.45586317100407, 
888.4294876524887, 358.5100281032135, 340.03805989443987, 304.87901122963297], 
"eval_len": [258, 31, 75, 74, 59, 62, 173, 64, 66, 55]}

 92%|█████████▏| 919999/1000000 [12:13:13<49:52, 26.73it/s]global step 920000, trans_decision ep_re 292.3226382459251

{"global_step": 920000, "eval_re": [409.4357834884818, 234.47389384002065, 
268.66061057207537, 147.15575122962116, 283.2162038012602, 307.5375080337597, 
288.0331657969351, 152.95390178679074, 515.304643712221, 316.45492019808546], 
"eval_len": [77, 47, 50, 28, 53, 59, 54, 29, 104, 62]}

 93%|█████████▎| 929999/1000000 [12:21:12<41:24, 28.17it/s]global step 930000, trans_decision ep_re 417.48424803646867

{"global_step": 930000, "eval_re": [482.0024027655047, 628.1595217211753, 
282.89083195970954, 120.15149624564495, 153.60780108537008, 480.7243450141103, 
324.89967785357885, 1018.5434612944506, 488.67594080194624, 195.1870016231961], 
"eval_len": [90, 113, 55, 23, 29, 95, 65, 201, 91, 39]}

 94%|█████████▍| 939999/1000000 [12:29:20<35:52, 27.88it/s]global step 940000, trans_decision ep_re 338.59443045543776

{"global_step": 940000, "eval_re": [300.90850722398994, 258.6327385826777, 
333.54283641552394, 248.14289506701584, 282.9178011446983, 297.4863701651816, 
647.5857297210698, 275.8345413685062, 476.70527898528843, 264.1876058804259], 
"eval_len": [55, 51, 63, 48, 54, 56, 121, 52, 93, 51]}

 95%|█████████▍| 949997/1000000 [12:37:05<30:15, 27.55it/s]global step 950000, trans_decision ep_re 306.7089138009984

{"global_step": 950000, "eval_re": [307.2729193583856, 474.35838928534673, 
280.9041103291711, 357.0297038265678, 430.50859435563916, 145.8480484190703, 
279.6966987111019, 293.62389376274444, 234.30163186990947, 263.5451480920469], 
"eval_len": [57, 91, 51, 67, 78, 28, 55, 55, 46, 51]}

 96%|█████████▌| 959998/1000000 [12:45:03<23:54, 27.88it/s]global step 960000, trans_decision ep_re 292.30618835054986

{"global_step": 960000, "eval_re": [242.2668983600816, 378.38292254462425, 
278.7814513615605, 120.54141920183768, 309.1436171733633, 259.1265918473863, 
125.18783019148583, 595.147502453955, 483.3738280574666, 131.10982231373796], 
"eval_len": [47, 74, 53, 23, 59, 49, 24, 107, 86, 25]}

 97%|█████████▋| 969998/1000000 [12:53:01<18:27, 27.10it/s]global step 970000, trans_decision ep_re 534.6202948981743

{"global_step": 970000, "eval_re": [620.0703781546327, 1078.3073649917105, 
611.0829949336651, 249.4799786651524, 343.7236610729075, 114.36535908878972, 
1364.4967110219175, 374.6220610469499, 195.55268582864562, 394.50175417737046], 
"eval_len": [113, 217, 109, 48, 65, 22, 236, 73, 39, 73]}

 98%|█████████▊| 979999/1000000 [13:01:01<12:16, 27.14it/s]global step 980000, trans_decision ep_re 397.957813237095

{"global_step": 980000, "eval_re": [296.4463072185559, 433.9581555425079, 
125.39857223407375, 230.30077158601256, 301.0909935457326, 97.33756217206826, 
199.32601101903228, 426.7631671712959, 1727.5318924353162, 141.42469944635502], 
"eval_len": [57, 78, 24, 46, 57, 19, 37, 78, 314, 27]}

 99%|█████████▉| 989998/1000000 [13:09:01<05:50, 28.55it/s]global step 990000, trans_decision ep_re 352.61181257893173

{"global_step": 990000, "eval_re": [331.67348756335844, 189.76136465654253, 
341.773359820344, 305.99142373182127, 320.6061556376385, 332.49701143323057, 
191.7751172200035, 411.77289240637526, 286.3143629705834, 813.9529503494201], 
"eval_len": [64, 36, 62, 57, 59, 59, 36, 75, 54, 154]}

100%|█████████▉| 999997/1000000 [13:17:00<00:00, 27.29it/s]global step 1000000, trans_decision ep_re 229.6530614778619

{"global_step": 1000000, "eval_re": [332.4695816920515, 96.62400066705328, 
237.1666478134169, 234.09489653453187, 240.3735350379724, 394.11183667608293, 
264.00452597180157, 158.4032164459933, 230.22590550062063, 109.05646843909456], 
"eval_len": [62, 19, 46, 47, 47, 76, 51, 30, 46, 21]}

100%|██████████| 1000000/1000000 [13:17:13<00:00, 20.91it/s]
