
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:30<8:52:28, 30.99it/s]global step 10000, trans_decision ep_re 324.1929454365894

{"global_step": 10000, "eval_re": [343.73302330195537, 348.54731249923867, 
338.88824497590974, 337.88011646470403, 308.4367809728885, 314.1589371268446, 
304.1284865875555, 326.3253078760148, 319.3714812489531, 300.4597633118303], 
"eval_len": [63, 64, 62, 62, 57, 58, 56, 60, 59, 56]}

  2%|▏         | 19999/1000000 [09:53<8:46:28, 31.02it/s]global step 20000, trans_decision ep_re 334.3422606755072

{"global_step": 20000, "eval_re": [368.5166026694032, 285.411146345186, 
295.90610912727806, 312.91669983395593, 353.4168052949327, 310.84789039729054, 
324.04794153877896, 367.21754187694506, 394.7503096647777, 330.39156000652326], 
"eval_len": [68, 53, 55, 57, 65, 58, 60, 66, 71, 60]}

  3%|▎         | 29999/1000000 [16:40<8:39:00, 31.15it/s]global step 30000, trans_decision ep_re 350.73009031629147

{"global_step": 30000, "eval_re": [325.14723117837264, 392.8928937010829, 
366.0912292460563, 373.12930798132646, 348.23687170472243, 353.3327981275786, 
323.88249046305526, 380.7667269021681, 329.6250368579243, 314.19631700062763], 
"eval_len": [61, 72, 67, 68, 64, 64, 60, 70, 62, 59]}

  4%|▍         | 39997/1000000 [23:16<9:07:33, 29.22it/s]global step 40000, trans_decision ep_re 333.6958595052359

{"global_step": 40000, "eval_re": [404.8140699025075, 392.6592513256727, 
303.5321131949189, 254.43942453479676, 324.61380439095115, 327.2522353628396, 
363.0257474658028, 386.0270555206536, 326.73376818281804, 253.86112517139733], 
"eval_len": [74, 71, 57, 47, 60, 60, 66, 70, 60, 47]}

  5%|▍         | 49999/1000000 [29:35<8:25:22, 31.33it/s]global step 50000, trans_decision ep_re 385.10519300943207

{"global_step": 50000, "eval_re": [369.4732761876362, 364.4850861418989, 
456.5317752036883, 383.42193876777026, 345.83969020174055, 422.7854765044271, 
341.9314325448278, 371.1773495344309, 454.29390542431616, 341.11199958358424], 
"eval_len": [68, 66, 84, 69, 63, 77, 62, 67, 84, 62]}

  6%|▌         | 59999/1000000 [36:11<8:32:42, 30.56it/s]global step 60000, trans_decision ep_re 336.46631540520355

{"global_step": 60000, "eval_re": [427.6440809761782, 281.2226536498803, 
318.7030688845852, 339.874718386857, 343.1518981398094, 366.62796518601067, 
278.84867281593966, 338.8516149283135, 343.5833859546178, 326.1550951298436], 
"eval_len": [78, 54, 60, 62, 62, 68, 53, 62, 63, 61]}

  7%|▋         | 69996/1000000 [42:16<6:41:40, 38.59it/s]global step 70000, trans_decision ep_re 323.97838731049933

{"global_step": 70000, "eval_re": [397.34254350109245, 350.1859255366696, 
350.72735161385486, 156.83080679277344, 400.44107073516767, 319.8524846628939, 
343.3341521941388, 347.2331631842695, 386.055720025804, 187.78065485832883], 
"eval_len": [73, 65, 66, 30, 73, 61, 63, 66, 70, 36]}

  8%|▊         | 79999/1000000 [48:52<6:54:25, 37.00it/s]global step 80000, trans_decision ep_re 356.0941462872057

{"global_step": 80000, "eval_re": [354.13104381528615, 379.0005649812974, 
305.6800628716138, 343.25716815680653, 365.56908976462347, 353.7474194490266, 
343.8379891591712, 358.20857170364036, 365.7493850773331, 391.7601678932582], 
"eval_len": [64, 68, 56, 64, 66, 64, 63, 64, 66, 72]}

  9%|▉         | 89999/1000000 [55:31<8:07:10, 31.13it/s]global step 90000, trans_decision ep_re 371.64951156351947

{"global_step": 90000, "eval_re": [376.5580403586077, 337.81674954098946, 
347.0274687729518, 361.83154096881725, 409.32645830740415, 370.5209118646857, 
430.83650286533975, 328.95927134082217, 358.14957434438526, 395.46859727119113],
"eval_len": [68, 62, 64, 68, 74, 67, 78, 62, 66, 72]}

 10%|▉         | 99997/1000000 [1:01:47<6:29:31, 38.51it/s]global step 100000, trans_decision ep_re 359.99650966494085

{"global_step": 100000, "eval_re": [393.5490809389473, 411.81301264615166, 
344.91119725571434, 373.3425973474945, 184.6021653151539, 327.5634486474503, 
400.50500284467694, 475.55378607079854, 337.01262226470817, 351.11218331831236],
"eval_len": [71, 74, 64, 68, 35, 61, 72, 86, 62, 64]}

 11%|█         | 109996/1000000 [1:08:31<8:00:07, 30.90it/s]global step 110000, trans_decision ep_re 347.24268742886244

{"global_step": 110000, "eval_re": [301.22522528818104, 362.9001218798446, 
428.343233354913, 340.5774270681474, 316.6647979379546, 405.1993035811188, 
375.3630219341171, 314.35284500474376, 324.41305393261905, 303.3878443069853], 
"eval_len": [56, 66, 78, 62, 58, 73, 69, 58, 60, 56]}

 12%|█▏        | 119998/1000000 [1:14:46<6:20:28, 38.55it/s]global step 120000, trans_decision ep_re 415.7228415640117

{"global_step": 120000, "eval_re": [505.65269133630875, 334.19992575306657, 
375.2988733112539, 356.1556674670893, 385.9410568636324, 359.38908994350084, 
546.6789801215926, 369.5865460350906, 488.98335336177627, 435.34223144680493], 
"eval_len": [92, 62, 69, 65, 71, 65, 100, 69, 87, 77]}

 13%|█▎        | 129999/1000000 [1:21:31<7:48:33, 30.95it/s]global step 130000, trans_decision ep_re 381.7937373087472

{"global_step": 130000, "eval_re": [413.4832290036202, 368.84727383160225, 
318.368127043618, 327.50664649880207, 436.2066278131068, 444.0333924365484, 
346.53458533939954, 378.0582471874958, 439.07534729581135, 345.82389663746727], 
"eval_len": [77, 67, 58, 61, 79, 81, 63, 69, 79, 63]}

 14%|█▍        | 139998/1000000 [1:27:44<6:12:22, 38.49it/s]global step 140000, trans_decision ep_re 336.93780591435177

{"global_step": 140000, "eval_re": [313.3619413656671, 333.04647064696974, 
157.1238046225062, 364.82440110375137, 389.11621391951144, 375.86083684545946, 
380.44659818204525, 363.86029067873204, 375.550197768464, 316.18730401041154], 
"eval_len": [58, 61, 30, 68, 71, 68, 68, 68, 67, 59]}

 15%|█▍        | 149999/1000000 [1:34:21<7:29:38, 31.51it/s]global step 150000, trans_decision ep_re 382.6377661170454

{"global_step": 150000, "eval_re": [392.2545892561934, 398.15034192469534, 
285.1208630751022, 406.8044162623305, 398.0482248380793, 376.2291302959536, 
356.49642058077893, 454.52916516349865, 377.4445024903427, 381.3000072834793], 
"eval_len": [72, 72, 54, 74, 72, 70, 64, 83, 69, 70]}

 16%|█▌        | 159999/1000000 [1:40:31<10:48:07, 21.60it/s]global step 160000, trans_decision ep_re 379.14729293691187

{"global_step": 160000, "eval_re": [403.834356381342, 359.66739898414386, 
381.73544858139735, 344.46793588070335, 370.86517156912805, 410.029165151645, 
441.32766283682673, 357.6790768586537, 386.05231853159836, 335.81439459367976], 
"eval_len": [74, 66, 70, 64, 67, 75, 80, 65, 70, 61]}

 17%|█▋        | 169999/1000000 [1:47:11<7:41:58, 29.94it/s]global step 170000, trans_decision ep_re 383.3702449310338

{"global_step": 170000, "eval_re": [357.2246514887239, 385.66055513220317, 
399.3871143711707, 331.4749200262159, 476.1380674295915, 355.0929150770894, 
393.96295627845655, 420.51936810994863, 430.65861133573713, 283.5832900612015], 
"eval_len": [64, 70, 72, 59, 88, 65, 71, 78, 79, 53]}

 18%|█▊        | 179997/1000000 [1:53:26<5:47:27, 39.33it/s]global step 180000, trans_decision ep_re 361.9406710217259

{"global_step": 180000, "eval_re": [368.24241429430253, 370.0372460197532, 
341.05741134722166, 327.6116301863609, 360.6844783475053, 337.5364535308565, 
366.60390583098587, 374.96142948770455, 404.2006292497111, 368.47111192285684], 
"eval_len": [67, 68, 62, 61, 66, 62, 67, 68, 75, 67]}

 19%|█▉        | 189999/1000000 [1:59:57<5:42:37, 39.40it/s]global step 190000, trans_decision ep_re 374.09794791465276

{"global_step": 190000, "eval_re": [458.5364388042082, 295.13140675611817, 
329.6903690212454, 342.1093120542584, 370.6587885854945, 436.5503139918653, 
389.6124336505287, 378.91083807172174, 443.86954459503016, 295.9100336160567], 
"eval_len": [85, 54, 60, 62, 67, 81, 70, 68, 82, 56]}

 20%|█▉        | 199997/1000000 [2:06:31<6:17:36, 35.31it/s]global step 200000, trans_decision ep_re 369.3345184452669

{"global_step": 200000, "eval_re": [331.5236623296135, 357.8063050556312, 
392.5930634735881, 409.30474926493105, 369.09473488661894, 351.50515497627504, 
339.2743787910539, 370.1764520069906, 372.2848831219772, 399.7818005459893], 
"eval_len": [61, 65, 73, 80, 68, 64, 62, 68, 68, 73]}

 21%|██        | 209997/1000000 [2:12:37<7:01:17, 31.25it/s]global step 210000, trans_decision ep_re 390.870217122505

{"global_step": 210000, "eval_re": [540.7320021903496, 369.6432994268772, 
337.1321556327844, 358.7430360059978, 421.7287662643874, 392.0426440296087, 
367.7352057672801, 367.4593503391494, 417.79653287064036, 335.68917869797497], 
"eval_len": [98, 67, 62, 65, 77, 70, 67, 66, 76, 61]}

 22%|██▏       | 219999/1000000 [2:19:21<5:37:35, 38.51it/s]global step 220000, trans_decision ep_re 390.3559217659175

{"global_step": 220000, "eval_re": [421.9125658580082, 376.17828538515994, 
414.9100502528733, 403.5108073251068, 216.69730819693342, 495.87840684434286, 
385.4761101998608, 370.50471480997993, 455.9503238216078, 362.54064496530253], 
"eval_len": [78, 70, 74, 74, 41, 91, 71, 68, 83, 66]}

 23%|██▎       | 229997/1000000 [2:25:46<8:37:58, 24.78it/s]global step 230000, trans_decision ep_re 377.986844127981

{"global_step": 230000, "eval_re": [424.9323489967787, 329.3200656426538, 
378.45470816935045, 375.89767837287343, 340.26401462876134, 414.31860696843273, 
389.2871715178744, 397.7982368194101, 372.2816991211926, 357.3139110424824], 
"eval_len": [77, 61, 70, 68, 64, 76, 71, 73, 67, 65]}

 24%|██▍       | 239999/1000000 [2:31:55<7:10:15, 29.44it/s]global step 240000, trans_decision ep_re 368.98389658479243

{"global_step": 240000, "eval_re": [388.52327694998706, 374.11073647382904, 
340.6375456308304, 419.98986447730186, 151.70851137401226, 417.60076164922265, 
425.13807017759257, 362.39727969870586, 385.8052198205172, 423.927699595926], 
"eval_len": [71, 68, 62, 76, 29, 76, 78, 67, 69, 80]}

 25%|██▍       | 249999/1000000 [2:38:06<6:00:23, 34.68it/s]global step 250000, trans_decision ep_re 388.2875320340854

{"global_step": 250000, "eval_re": [392.8758071978092, 378.50071047688954, 
372.60399577759506, 409.1130437471697, 379.64877548915086, 418.3931968677439, 
382.2386450056029, 394.65710582155754, 372.2014735942797, 382.6425663630557], 
"eval_len": [71, 68, 69, 74, 70, 77, 69, 72, 67, 70]}

 26%|██▌       | 259999/1000000 [2:44:19<5:33:09, 37.02it/s]global step 260000, trans_decision ep_re 391.4994586893725

{"global_step": 260000, "eval_re": [437.82986342525277, 408.1067148399298, 
298.36797938943744, 459.64025722629714, 309.19958773151035, 407.4447343531533, 
380.09930894494113, 378.33189562491015, 385.1810079822132, 450.79323737607984], 
"eval_len": [81, 75, 55, 83, 56, 74, 69, 70, 70, 82]}

 27%|██▋       | 269999/1000000 [2:50:07<5:15:58, 38.51it/s]global step 270000, trans_decision ep_re 371.3569686354796

{"global_step": 270000, "eval_re": [372.3981020878521, 407.68528229589646, 
367.4110012809994, 382.338133546851, 355.9922203111596, 364.7545241386218, 
372.79642844188265, 361.6863896222195, 376.0804153967382, 352.4271892325744], 
"eval_len": [68, 74, 67, 70, 65, 67, 68, 65, 68, 64]}

 28%|██▊       | 279995/1000000 [2:56:00<5:01:12, 39.84it/s]global step 280000, trans_decision ep_re 374.5238713960456

{"global_step": 280000, "eval_re": [364.1061019977139, 374.7812913492292, 
395.3477829103404, 375.42956105771935, 353.703874285021, 369.75639953070925, 
390.97160301436116, 369.60809118178037, 367.800329772833, 383.73367886074885], 
"eval_len": [66, 69, 72, 68, 65, 67, 72, 68, 67, 70]}

 29%|██▉       | 289998/1000000 [3:01:53<4:53:12, 40.36it/s]global step 290000, trans_decision ep_re 343.87146651766

{"global_step": 290000, "eval_re": [360.544511055958, 401.0023401954592, 
383.13244157197573, 338.38979623217915, 319.35809839644514, 376.2720904842799, 
307.32182723736526, 346.3374784476107, 290.42109598962804, 315.9349855656988], 
"eval_len": [66, 73, 71, 61, 60, 69, 57, 62, 57, 59]}

 30%|██▉       | 299995/1000000 [3:07:25<4:53:24, 39.76it/s]global step 300000, trans_decision ep_re 354.6441121999562

{"global_step": 300000, "eval_re": [380.0874462966784, 357.5064156359032, 
381.7989806950174, 332.39131345995827, 330.9068482192215, 321.9026600692298, 
371.3994096991842, 370.61793193823286, 367.0372327679793, 332.79288321815756], 
"eval_len": [71, 67, 71, 62, 62, 60, 68, 69, 67, 62]}

 31%|███       | 309997/1000000 [3:13:25<5:25:07, 35.37it/s]global step 310000, trans_decision ep_re 375.30821400173215

{"global_step": 310000, "eval_re": [364.7824057469755, 320.7161662251177, 
407.8966168199241, 493.0927056693319, 323.45286208750525, 387.9061938911656, 
358.302873318027, 348.8869953496338, 382.44804839032116, 365.5972725193193], 
"eval_len": [67, 61, 75, 87, 62, 71, 67, 64, 69, 67]}

 32%|███▏      | 319997/1000000 [3:19:45<4:55:07, 38.40it/s]global step 320000, trans_decision ep_re 391.4510517929597

{"global_step": 320000, "eval_re": [352.75370918698786, 468.00262602207107, 
420.8121446746034, 412.9249437325372, 378.0679640884901, 378.55315206581895, 
374.32151363355376, 392.7241045016721, 361.1078761876976, 375.2424838361654], 
"eval_len": [64, 86, 76, 76, 69, 70, 70, 72, 66, 70]}

 33%|███▎      | 329997/1000000 [3:25:57<4:49:46, 38.54it/s]global step 330000, trans_decision ep_re 374.282679774234

{"global_step": 330000, "eval_re": [333.5668565220355, 400.84352058884247, 
331.44073562688914, 447.5174561203109, 399.52339599166174, 369.0331375047794, 
357.7649600341178, 388.4036437739859, 358.31168311528785, 356.421408464429], 
"eval_len": [62, 73, 61, 82, 73, 69, 66, 72, 65, 66]}

 34%|███▍      | 339998/1000000 [3:32:23<5:51:05, 31.33it/s]global step 340000, trans_decision ep_re 374.5085012089829

{"global_step": 340000, "eval_re": [463.5937485289959, 358.4245698713155, 
151.7793095785844, 447.81653123810804, 378.29807875540746, 384.15853955285303, 
374.9349293283988, 426.2987242714211, 374.2935619371711, 385.48701902757443], 
"eval_len": [91, 66, 29, 82, 69, 71, 68, 79, 69, 72]}

 35%|███▍      | 349997/1000000 [3:38:40<4:53:45, 36.88it/s]global step 350000, trans_decision ep_re 388.31136981808066

{"global_step": 350000, "eval_re": [404.6891922081073, 382.34773035193643, 
343.36508060066836, 387.1830956356675, 371.1376255176451, 407.2126567061441, 
456.9824537032267, 413.4583497410725, 343.14725060014337, 373.59026311619556], 
"eval_len": [74, 69, 65, 70, 69, 74, 85, 77, 64, 67]}

 36%|███▌      | 359999/1000000 [3:44:54<4:26:44, 39.99it/s]global step 360000, trans_decision ep_re 326.3951892699257

{"global_step": 360000, "eval_re": [312.25264132947416, 348.7123322470228, 
385.257625369623, 338.0495510965693, 396.48227154051807, 326.2233207394547, 
430.9056854625619, 114.3203389356459, 296.6333849915583, 315.1147409868288], 
"eval_len": [59, 66, 71, 63, 73, 62, 79, 22, 56, 62]}

 37%|███▋      | 369995/1000000 [3:50:31<4:21:47, 40.11it/s]global step 370000, trans_decision ep_re 377.10239753112467

{"global_step": 370000, "eval_re": [332.6305099355896, 359.74817786572885, 
577.7726294051853, 370.22639113842723, 356.1488443299429, 332.0007011162465, 
414.1862661165774, 353.2338971968937, 334.3797998155409, 340.6967583911144], 
"eval_len": [62, 66, 105, 68, 65, 63, 75, 65, 62, 63]}

 38%|███▊      | 379999/1000000 [3:56:09<4:15:32, 40.44it/s]global step 380000, trans_decision ep_re 352.43674105238273

{"global_step": 380000, "eval_re": [319.76546375424215, 364.0682600399607, 
375.8049913067135, 336.80879764136745, 363.45198242589584, 379.2671876094595, 
426.036950678726, 280.30130471263953, 328.82261077231664, 350.0398615825059], 
"eval_len": [59, 67, 70, 63, 66, 70, 77, 53, 63, 65]}

 39%|███▉      | 389998/1000000 [4:01:47<4:11:01, 40.50it/s]global step 390000, trans_decision ep_re 421.8853975538327

{"global_step": 390000, "eval_re": [360.3302425204124, 361.71028809170843, 
349.2117982682149, 332.33657125645493, 809.4347320345469, 359.1199101258801, 
353.5731006680698, 340.8316430548967, 545.3611299978587, 406.9445595202843], 
"eval_len": [67, 67, 64, 62, 154, 68, 65, 63, 99, 74]}

 40%|███▉      | 399995/1000000 [4:07:25<4:07:55, 40.33it/s]global step 400000, trans_decision ep_re 396.3674543643871

{"global_step": 400000, "eval_re": [335.10756100078544, 703.2913431179517, 
277.4847535598299, 355.52237868978375, 403.6644745190566, 360.84353002607844, 
264.35998772285865, 305.8377794705377, 297.09061239969583, 660.4721231372929], 
"eval_len": [63, 131, 53, 66, 74, 66, 53, 58, 58, 122]}

 41%|████      | 409997/1000000 [4:13:03<4:05:37, 40.03it/s]global step 410000, trans_decision ep_re 376.9873823568918

{"global_step": 410000, "eval_re": [315.71033107058366, 487.8081176409139, 
388.84595373465976, 234.82104719036965, 357.22255956669744, 338.8285307216012, 
352.8823457244433, 336.3005434344575, 533.5832676674935, 423.87112681769787], 
"eval_len": [60, 88, 71, 47, 65, 63, 65, 62, 96, 79]}

 42%|████▏     | 419996/1000000 [4:18:44<3:56:52, 40.81it/s]global step 420000, trans_decision ep_re 364.9541845100307

{"global_step": 420000, "eval_re": [354.30190444947, 335.7718848700971, 
366.53448743466686, 367.99498704258167, 326.12218324839245, 343.3288451672673, 
359.0270984777539, 458.51123548027704, 354.78419666178337, 383.16502226801737], 
"eval_len": [65, 63, 67, 68, 63, 64, 66, 84, 65, 70]}

 43%|████▎     | 429998/1000000 [4:24:29<3:53:53, 40.62it/s]global step 430000, trans_decision ep_re 412.8519805254458

{"global_step": 430000, "eval_re": [557.9143932137936, 337.1595875540901, 
394.4903077392234, 552.6130235112619, 315.8187902303653, 398.23562307943797, 
396.9783799328867, 449.2368851854376, 347.5084295625618, 378.56438524540033], 
"eval_len": [102, 63, 71, 101, 60, 73, 73, 85, 65, 70]}

 44%|████▍     | 439997/1000000 [4:30:10<3:52:12, 40.19it/s]global step 440000, trans_decision ep_re 446.4692420134314

{"global_step": 440000, "eval_re": [359.4428249507049, 1172.892724515465, 
403.59425178526203, 358.50172304303294, 324.21638964501795, 335.21981163541, 
342.38039650187426, 337.84189569248355, 479.86898885845756, 350.73341350660706],
"eval_len": [66, 229, 73, 64, 60, 63, 63, 62, 85, 65]}

 45%|████▍     | 449998/1000000 [4:35:52<3:45:22, 40.67it/s]global step 450000, trans_decision ep_re 309.69328038860357

{"global_step": 450000, "eval_re": [264.2518371666332, 304.8429094097484, 
310.3892058582671, 297.81406065104585, 316.68509869528367, 359.79419131165804, 
323.3431153513786, 342.81659338697614, 276.25983279894757, 300.7359592560972], 
"eval_len": [50, 57, 58, 56, 59, 67, 59, 63, 53, 56]}

 46%|████▌     | 459999/1000000 [4:41:44<3:44:05, 40.16it/s]global step 460000, trans_decision ep_re 531.4765553381841

{"global_step": 460000, "eval_re": [461.15837139727057, 1139.474362304031, 
432.30077625164176, 396.9963957415698, 887.2220552938287, 383.88621512116504, 
354.11350387419645, 409.2637242679872, 440.2651726961828, 410.084976433966], 
"eval_len": [83, 206, 80, 73, 164, 71, 65, 75, 81, 75]}

 47%|████▋     | 469998/1000000 [4:47:11<3:37:54, 40.54it/s]global step 470000, trans_decision ep_re 374.9297751288147

{"global_step": 470000, "eval_re": [488.39956734347527, 351.6926469947558, 
503.05326822546493, 338.0893251380139, 333.71189382084145, 419.34896199855785, 
298.5094812378654, 307.04479038094803, 369.89127090572975, 339.55654524249456], 
"eval_len": [88, 65, 90, 62, 61, 78, 57, 59, 70, 62]}

 48%|████▊     | 479995/1000000 [4:52:49<3:35:47, 40.16it/s]global step 480000, trans_decision ep_re 351.7191135013629

{"global_step": 480000, "eval_re": [292.4885073467966, 484.602582696181, 
326.86555443467097, 361.2756492179266, 311.773204096786, 423.83163152709386, 
325.83173420417876, 365.8924587421655, 324.54744862781064, 300.0823641200192], 
"eval_len": [56, 86, 61, 67, 58, 78, 61, 68, 60, 55]}

 49%|████▉     | 489996/1000000 [4:58:27<3:28:55, 40.69it/s]global step 490000, trans_decision ep_re 498.5407671811953

{"global_step": 490000, "eval_re": [360.37305510751384, 287.43886837994484, 
610.3177540599265, 643.2223125086139, 540.3818786753508, 509.9027146819213, 
588.0918205058294, 475.685147841297, 591.2854209833075, 378.7086990682481], 
"eval_len": [67, 54, 113, 115, 96, 91, 103, 85, 103, 68]}

 50%|████▉     | 499998/1000000 [5:04:06<3:24:56, 40.66it/s]global step 500000, trans_decision ep_re 424.4895509963905

{"global_step": 500000, "eval_re": [328.5541883321399, 602.5223990239973, 
457.5793346906723, 427.9034282820268, 284.514807348239, 343.19731452246356, 
481.5563352844232, 372.02203327209384, 291.58619464254474, 655.4594745653042], 
"eval_len": [62, 106, 82, 77, 55, 63, 85, 69, 54, 117]}

 51%|█████     | 509997/1000000 [5:09:45<3:23:19, 40.16it/s]global step 510000, trans_decision ep_re 471.9590244961032

{"global_step": 510000, "eval_re": [1460.3289390967566, 416.7464183440853, 
361.3102777110629, 371.7108959757613, 360.11001217219746, 351.4930600374946, 
365.8954380946755, 299.21772776040666, 306.07081330937314, 426.70666245921865], 
"eval_len": [268, 75, 67, 68, 66, 66, 67, 56, 57, 77]}

 52%|█████▏    | 519998/1000000 [5:15:24<3:15:26, 40.93it/s]global step 520000, trans_decision ep_re 400.0923970524002

{"global_step": 520000, "eval_re": [354.55932610775324, 307.06835268313483, 
368.42838730580934, 384.57306864553334, 370.16661302522283, 493.3018335013694, 
485.8378434098699, 413.9710320305481, 506.1121156164468, 316.9053981983144], 
"eval_len": [65, 57, 68, 72, 69, 92, 89, 75, 89, 60]}

 53%|█████▎    | 529996/1000000 [5:21:02<3:12:19, 40.73it/s]global step 530000, trans_decision ep_re 343.2834781939666

{"global_step": 530000, "eval_re": [339.966262035284, 349.351580133026, 
332.87722035720185, 371.7063455564603, 366.3321502052964, 341.25727235759376, 
322.6735160676286, 310.2345098149411, 348.46323301166734, 349.9726924005664], 
"eval_len": [64, 65, 62, 70, 68, 63, 60, 58, 64, 65]}

 54%|█████▍    | 539995/1000000 [5:26:40<3:10:07, 40.33it/s]global step 540000, trans_decision ep_re 429.14682775705296

{"global_step": 540000, "eval_re": [443.23393482536284, 512.7227027942139, 
449.9523587373103, 320.9517783160296, 346.358333431227, 497.5574144114866, 
497.9268372494258, 349.83199039007343, 324.26668473535267, 548.6662426800474], 
"eval_len": [79, 92, 81, 61, 64, 90, 89, 64, 60, 97]}

 55%|█████▍    | 549996/1000000 [5:32:18<3:04:09, 40.72it/s]global step 550000, trans_decision ep_re 429.8255181290916

{"global_step": 550000, "eval_re": [307.81365356213007, 391.2351433211375, 
402.8821374850483, 355.4175117768057, 408.5530812054918, 490.0943183021681, 
910.2564102382293, 303.1468063654117, 319.4969935297957, 409.35912550469754], 
"eval_len": [57, 71, 73, 65, 76, 91, 171, 58, 59, 76]}

 56%|█████▌    | 559996/1000000 [5:37:56<3:00:42, 40.58it/s]global step 560000, trans_decision ep_re 521.4790070728134

{"global_step": 560000, "eval_re": [499.860214959486, 463.63080188157414, 
274.57723583266346, 413.8749510847375, 1053.727548663714, 888.526840940712, 
459.46933740158585, 406.1479623291732, 367.3213238380221, 387.6538537964648], 
"eval_len": [92, 86, 53, 76, 202, 161, 82, 71, 68, 70]}

 57%|█████▋    | 569999/1000000 [5:43:34<2:58:56, 40.05it/s]global step 570000, trans_decision ep_re 413.23381592597235

{"global_step": 570000, "eval_re": [596.9794707465984, 397.16903657221735, 
402.10518650175146, 368.7015893339204, 362.8124221773619, 339.3093427629798, 
449.83110139706656, 310.17658508145905, 392.56449685165234, 512.6889278347164], 
"eval_len": [113, 74, 75, 68, 68, 62, 81, 59, 72, 96]}

 58%|█████▊    | 579997/1000000 [5:49:12<2:54:21, 40.15it/s]global step 580000, trans_decision ep_re 497.7085229544791

{"global_step": 580000, "eval_re": [336.1258599984045, 508.58629863341196, 
414.57350065155254, 581.3116432441486, 435.1337034873914, 867.3097834343398, 
414.48078335124643, 532.6923396185375, 366.14602511492535, 520.7252920108334], 
"eval_len": [63, 92, 75, 102, 79, 164, 75, 96, 67, 94]}

 59%|█████▉    | 589995/1000000 [5:54:50<2:51:00, 39.96it/s]global step 590000, trans_decision ep_re 380.6659800898832

{"global_step": 590000, "eval_re": [417.94569197872863, 482.01565512890437, 
393.75930024442084, 526.1971741276543, 367.2365765397735, 331.72161724385353, 
367.9948145820486, 373.73451922181425, 320.311167532757, 225.74328429887717], 
"eval_len": [80, 90, 72, 93, 68, 62, 66, 70, 59, 45]}

 60%|█████▉    | 599996/1000000 [6:00:28<2:44:36, 40.50it/s]global step 600000, trans_decision ep_re 581.2148743911788

{"global_step": 600000, "eval_re": [393.87478369172453, 637.548971939162, 
856.0183900854028, 467.7185338733585, 459.3651466504573, 1036.1465125151433, 
418.68024838354165, 451.9729483510532, 484.32629244434384, 606.4969159776012], 
"eval_len": [71, 114, 158, 85, 83, 181, 77, 81, 89, 109]}

 61%|██████    | 609995/1000000 [6:06:06<2:41:15, 40.31it/s]global step 610000, trans_decision ep_re 516.1447883017397

{"global_step": 610000, "eval_re": [548.7060413093753, 320.9888037689112, 
406.09306080930213, 333.9738308547286, 506.5745315206331, 517.065661776147, 
417.54704706415356, 353.8434947748995, 577.4264669649049, 1179.228944174342], 
"eval_len": [107, 59, 73, 62, 93, 103, 76, 65, 107, 250]}

 62%|██████▏   | 619997/1000000 [6:11:54<2:38:39, 39.92it/s]global step 620000, trans_decision ep_re 567.3371772891612

{"global_step": 620000, "eval_re": [357.0152871210615, 580.280088164702, 
473.4852954212991, 935.2454219750945, 500.9727132238213, 1223.7816446143463, 
628.5250475159199, 339.44681514884104, 336.3819617618793, 298.23749794464766], 
"eval_len": [65, 102, 91, 180, 93, 225, 109, 63, 63, 57]}

 63%|██████▎   | 629996/1000000 [6:17:22<2:32:02, 40.56it/s]global step 630000, trans_decision ep_re 527.074016300379

{"global_step": 630000, "eval_re": [784.3737594115553, 507.48389522948196, 
398.1157234311185, 536.7365398086894, 417.28146677469135, 326.70988005833186, 
763.5168506602979, 355.4363035249889, 342.0905370822921, 838.9952070223428], 
"eval_len": [138, 93, 74, 107, 76, 62, 141, 66, 65, 165]}

 64%|██████▍   | 639998/1000000 [6:23:00<2:28:21, 40.44it/s]global step 640000, trans_decision ep_re 581.1759726032872

{"global_step": 640000, "eval_re": [246.10079276002654, 367.304367467944, 
391.22852662474344, 707.530304101923, 1181.510419269848, 554.6844393519474, 
1208.469677426229, 464.879259454239, 371.35619839499503, 318.69574118097603], 
"eval_len": [50, 68, 72, 136, 240, 100, 242, 86, 68, 59]}

 65%|██████▍   | 649996/1000000 [6:28:39<2:24:21, 40.41it/s]global step 650000, trans_decision ep_re 528.5881246214333

{"global_step": 650000, "eval_re": [328.41368736054153, 546.1770315132652, 
410.69202182203975, 384.05724400098165, 733.0177007829958, 1088.212401855365, 
347.95956335039864, 688.3335450454312, 328.5187203418013, 430.4993301415123], 
"eval_len": [62, 99, 78, 73, 132, 203, 64, 138, 61, 76]}

 66%|██████▌   | 659997/1000000 [6:34:17<2:20:43, 40.27it/s]global step 660000, trans_decision ep_re 408.47166044345204

{"global_step": 660000, "eval_re": [357.1332969020909, 381.142191388215, 
532.1364086943472, 376.2883712818405, 407.05065609005743, 689.6119417581563, 
305.48153951258377, 467.4800562587979, 315.57483370096526, 252.8173088474661], 
"eval_len": [67, 71, 109, 70, 77, 133, 57, 88, 60, 49]}

 67%|██████▋   | 669995/1000000 [6:39:55<2:17:06, 40.11it/s]global step 670000, trans_decision ep_re 588.6437680700631

{"global_step": 670000, "eval_re": [178.92790715947595, 578.2670686710895, 
400.8916011922439, 373.6198849633751, 283.9373564414926, 839.893381812179, 
1179.8533445915384, 406.9927102191933, 952.4911431124585, 691.5632825375843], 
"eval_len": [34, 117, 72, 68, 53, 159, 228, 73, 188, 124]}

 68%|██████▊   | 679996/1000000 [6:45:44<2:10:18, 40.93it/s]global step 680000, trans_decision ep_re 486.2201384304184

{"global_step": 680000, "eval_re": [999.3807948634824, 817.1853804960298, 
410.7436525413124, 417.887305564359, 245.82307824044548, 466.41892524027827, 
501.6843946312773, 372.71408549623663, 312.49751022769004, 317.86625700307326], 
"eval_len": [193, 151, 74, 75, 46, 85, 87, 69, 60, 59]}

 69%|██████▉   | 689997/1000000 [6:51:11<2:09:07, 40.01it/s]global step 690000, trans_decision ep_re 433.16632490382653

{"global_step": 690000, "eval_re": [517.398581095686, 480.3410616802451, 
362.5754541323378, 440.9980976999928, 255.6101702113486, 490.4078323843652, 
432.9964604177211, 394.58340403358125, 631.0767794753554, 325.67540790763206], 
"eval_len": [104, 91, 70, 81, 50, 86, 80, 70, 118, 59]}

 70%|██████▉   | 699996/1000000 [6:56:49<2:02:30, 40.81it/s]global step 700000, trans_decision ep_re 453.06940006370235

{"global_step": 700000, "eval_re": [463.01837784435946, 375.53769452289055, 
379.8176251087691, 428.6349324535977, 352.695963607644, 752.4809128858038, 
364.80131284406764, 631.4889866961173, 369.29535513386946, 412.922839539904], 
"eval_len": [83, 67, 69, 79, 65, 137, 67, 111, 66, 74]}

 71%|███████   | 709999/1000000 [7:02:27<2:00:29, 40.11it/s]global step 710000, trans_decision ep_re 396.7534097449707

{"global_step": 710000, "eval_re": [168.22142413798855, 293.1101103732087, 
449.4594521406815, 358.58611930498614, 448.69492923212334, 382.45624183572437, 
414.12410479802026, 326.4373560298759, 890.4516652614468, 235.99269433565183], 
"eval_len": [32, 56, 85, 68, 88, 69, 80, 63, 165, 46]}

 72%|███████▏  | 719996/1000000 [7:08:05<1:54:47, 40.65it/s]global step 720000, trans_decision ep_re 430.402591196331

{"global_step": 720000, "eval_re": [534.0171675254733, 451.64220062883925, 
399.64002840892783, 806.0238080787709, 316.0523133867932, 489.5946804858124, 
322.37919034314984, 298.4706921472214, 491.25624566167875, 194.9495852966438], 
"eval_len": [95, 79, 75, 146, 58, 100, 60, 58, 99, 37]}

 73%|███████▎  | 729999/1000000 [7:13:43<1:52:10, 40.11it/s]global step 730000, trans_decision ep_re 333.9089100955995

{"global_step": 730000, "eval_re": [371.04408626944064, 389.8444800250747, 
270.0493054467725, 340.48809187038086, 368.5528489403408, 248.92688291683302, 
390.59912872100904, 284.8928086248791, 423.82733951274935, 250.86412862851444], 
"eval_len": [69, 71, 52, 64, 69, 48, 76, 54, 78, 49]}

 74%|███████▍  | 739995/1000000 [7:19:20<1:48:04, 40.10it/s]global step 740000, trans_decision ep_re 538.7152745333754

{"global_step": 740000, "eval_re": [347.9067186457309, 282.9456228335561, 
325.457244131177, 516.1852991311362, 740.4986708366819, 710.7277087152819, 
333.66756771715717, 683.777464806954, 437.67614446934857, 1008.3103040467298], 
"eval_len": [65, 53, 62, 96, 139, 137, 65, 119, 82, 189]}

 75%|███████▍  | 749999/1000000 [7:24:59<1:43:52, 40.11it/s]global step 750000, trans_decision ep_re 387.5647874457644

{"global_step": 750000, "eval_re": [344.26847998333415, 237.53613547363923, 
336.7771979683253, 592.2339780498456, 294.4121797475389, 565.7620178407834, 
285.4293138644177, 244.40949936257493, 235.03482337974182, 739.784248787443], 
"eval_len": [64, 46, 63, 111, 56, 109, 54, 47, 46, 140]}

 76%|███████▌  | 759995/1000000 [7:30:36<1:39:38, 40.15it/s]global step 760000, trans_decision ep_re 456.82988072666046

{"global_step": 760000, "eval_re": [345.71666940841595, 450.49099858876485, 
479.0845142706111, 266.3945075353978, 384.7354017282288, 388.6930404915124, 
1067.317722954412, 185.35969031019098, 328.7588015868235, 671.7474603922475], 
"eval_len": [65, 85, 85, 51, 71, 70, 200, 35, 62, 127]}

 77%|███████▋  | 769997/1000000 [7:36:24<1:34:52, 40.40it/s]global step 770000, trans_decision ep_re 382.5470890178621

{"global_step": 770000, "eval_re": [577.1133547538617, 469.9899832298896, 
293.2910389984103, 312.6023831333694, 699.6934096818555, 360.3601491947261, 
384.56797119223614, 265.36277037043635, 320.90285761799333, 141.5869720058427], 
"eval_len": [106, 91, 54, 59, 136, 66, 74, 51, 59, 27]}

 78%|███████▊  | 779999/1000000 [7:42:04<1:32:24, 39.68it/s]global step 780000, trans_decision ep_re 503.11763322847884

{"global_step": 780000, "eval_re": [312.5316761379263, 1724.112061647573, 
321.2341584955605, 751.1639016484548, 275.9040384672458, 291.63586146133923, 
280.06975849338534, 354.4208458155032, 359.1085703985171, 360.99545971928274], 
"eval_len": [59, 353, 61, 133, 54, 57, 53, 66, 66, 67]}

 79%|███████▉  | 789998/1000000 [7:47:36<1:26:55, 40.26it/s]global step 790000, trans_decision ep_re 503.6410918325584

{"global_step": 790000, "eval_re": [1168.8122403258894, 878.7064292666583, 
356.2507741803433, 276.30670977073333, 320.69356368442993, 416.71356458157663, 
391.6933836186098, 445.3537159873538, 448.8007285333245, 333.0798083766657], 
"eval_len": [212, 171, 64, 53, 60, 76, 72, 80, 81, 61]}

 80%|███████▉  | 799995/1000000 [7:53:19<1:23:33, 39.89it/s]global step 800000, trans_decision ep_re 328.9738578692095

{"global_step": 800000, "eval_re": [278.9402726582988, 259.56986337428805, 
298.18817886484294, 394.84745217534356, 298.1583544923491, 296.2435560589496, 
487.88535284140755, 273.66433926969927, 400.8258296742061, 301.41537928271015], 
"eval_len": [56, 50, 56, 74, 56, 56, 87, 54, 74, 56]}

 81%|████████  | 809997/1000000 [7:59:01<1:19:17, 39.94it/s]global step 810000, trans_decision ep_re 412.61053887809555

{"global_step": 810000, "eval_re": [1226.1674040060843, 364.05839519958704, 
378.31690946218686, 574.7665295041336, 152.70635263921287, 287.8334778021031, 
190.33590029317762, 404.52124898340526, 297.5448375097894, 249.85433338127578], 
"eval_len": [234, 67, 69, 106, 29, 54, 36, 79, 56, 49]}

 82%|████████▏ | 819999/1000000 [8:04:54<1:15:29, 39.74it/s]global step 820000, trans_decision ep_re 321.53527847464085

{"global_step": 820000, "eval_re": [286.17192902782597, 338.992904527343, 
258.46248274038106, 313.07762783640874, 505.678800148322, 287.7868617364305, 
357.77074715657375, 256.13575744968483, 252.25064661573325, 359.02502750770503],
"eval_len": [54, 63, 51, 59, 91, 56, 69, 50, 49, 66]}

 83%|████████▎ | 829995/1000000 [8:10:24<1:11:12, 39.79it/s]global step 830000, trans_decision ep_re 387.2055988531919

{"global_step": 830000, "eval_re": [307.50697003534833, 701.4373064313841, 
345.46447657668256, 338.1740319293297, 141.7996016757394, 335.38552602019183, 
294.70526973986597, 326.162414042439, 284.4430977334229, 796.9772943475147], 
"eval_len": [58, 132, 64, 62, 27, 61, 54, 60, 56, 149]}

 84%|████████▍ | 839999/1000000 [8:16:07<1:07:12, 39.68it/s]global step 840000, trans_decision ep_re 464.60414998321414

{"global_step": 840000, "eval_re": [939.2522646931511, 396.78653858004736, 
329.5397865978725, 1109.8598561148344, 292.5774270168336, 345.85324103604336, 
322.8602336869732, 323.5481539810515, 274.7463415398657, 311.017656585468], 
"eval_len": [182, 73, 61, 207, 55, 63, 60, 60, 52, 57]}

 85%|████████▍ | 849997/1000000 [8:21:49<1:02:57, 39.71it/s]global step 850000, trans_decision ep_re 379.93191393957215

{"global_step": 850000, "eval_re": [478.3517850842035, 279.9979673040372, 
278.1128281170173, 278.1793026531488, 900.1716382819898, 224.59124388985552, 
288.1854651071186, 310.23493785772854, 313.98442671067795, 447.50954438994466], 
"eval_len": [91, 53, 53, 53, 176, 42, 55, 58, 58, 86]}

 86%|████████▌ | 859999/1000000 [8:27:31<58:34, 39.84it/s]global step 860000, trans_decision ep_re 451.34486652473635

{"global_step": 860000, "eval_re": [448.6105901295785, 515.2820633636052, 
316.92420312163415, 457.44744967648626, 372.9104441460098, 616.4402949270309, 
407.49328558869547, 418.72401486779984, 535.6035054745317, 424.012813951992], 
"eval_len": [81, 96, 59, 82, 69, 131, 77, 74, 98, 74]}

 87%|████████▋ | 869997/1000000 [8:33:15<54:06, 40.04it/s]global step 870000, trans_decision ep_re 523.0647308262505

{"global_step": 870000, "eval_re": [394.7070787809892, 371.0377311060922, 
361.06084787044506, 310.5354863633668, 388.02162017023204, 400.94278402210347, 
260.0208552602389, 1645.7470805518942, 253.82671608754313, 844.7471080496011], 
"eval_len": [72, 68, 66, 57, 73, 73, 49, 337, 48, 156]}

 88%|████████▊ | 879995/1000000 [8:38:57<50:17, 39.77it/s]global step 880000, trans_decision ep_re 484.955840140067

{"global_step": 880000, "eval_re": [362.09480301397423, 275.9385077833241, 
2015.1258668342866, 361.1590515486647, 335.42130075170786, 434.81693668558756, 
267.28827504035087, 278.6079103013741, 264.2581848414462, 254.84756459995342], 
"eval_len": [69, 51, 392, 66, 62, 86, 51, 52, 50, 48]}

 89%|████████▉ | 889999/1000000 [8:44:42<46:00, 39.84it/s]global step 890000, trans_decision ep_re 417.88926937619425

{"global_step": 890000, "eval_re": [352.0198707133407, 368.2273716692719, 
303.7829079954539, 228.16795733201482, 724.9497230457727, 335.95313338351366, 
300.1409657387136, 209.25925074677806, 1061.2060804639543, 295.18543267312936], 
"eval_len": [64, 68, 56, 44, 126, 61, 56, 39, 216, 55]}

 90%|████████▉ | 899999/1000000 [8:50:25<41:43, 39.94it/s]global step 900000, trans_decision ep_re 483.6165639761383

{"global_step": 900000, "eval_re": [480.20184128343595, 399.76772471922067, 
484.9649646222994, 319.1947681219872, 437.37200341111424, 314.6481266436978, 
1537.7673537921544, 289.8578786635348, 308.54192654985434, 263.8490519540845], 
"eval_len": [96, 73, 86, 61, 87, 58, 287, 54, 57, 49]}

 91%|█████████ | 909997/1000000 [8:56:09<37:33, 39.93it/s]global step 910000, trans_decision ep_re 435.1672999047581

{"global_step": 910000, "eval_re": [515.7980026503503, 424.2210760341397, 
420.3681613150685, 370.27879139238263, 447.5434903488937, 419.0657838858176, 
386.29825838198906, 432.58399633206153, 312.4255659558532, 623.0898727510249], 
"eval_len": [92, 77, 81, 67, 85, 79, 70, 79, 59, 109]}

 92%|█████████▏| 919997/1000000 [9:02:09<33:40, 39.59it/s]global step 920000, trans_decision ep_re 576.607258779915

{"global_step": 920000, "eval_re": [541.0296242669561, 232.00311464233903, 
463.3277655216575, 681.0464371452111, 241.38470898657516, 358.90637822568345, 
396.84664803852684, 831.5122789410732, 583.5220176917695, 1436.4936143393575], 
"eval_len": [104, 44, 91, 141, 45, 66, 71, 161, 110, 278]}

 93%|█████████▎| 929995/1000000 [9:08:23<29:11, 39.98it/s]global step 930000, trans_decision ep_re 317.3695330614834

{"global_step": 930000, "eval_re": [334.27580976733844, 375.8715352382004, 
311.42759649178487, 283.3271468932344, 323.71099287307464, 354.5199477789633, 
247.9959894188167, 337.9400704337218, 266.89198735055027, 337.7342543691492], 
"eval_len": [63, 70, 61, 52, 61, 65, 47, 65, 49, 62]}

 94%|█████████▍| 939997/1000000 [9:14:41<25:39, 38.97it/s]global step 940000, trans_decision ep_re 427.7463916305601

{"global_step": 940000, "eval_re": [230.5734018819074, 471.2991431437645, 
241.5410806724642, 226.52894930426103, 225.37843874889052, 306.02683920215924, 
204.87119546996118, 1746.141538963132, 398.08584868131385, 227.0174802377469], 
"eval_len": [44, 85, 46, 44, 43, 56, 40, 322, 77, 44]}

 95%|█████████▍| 949997/1000000 [9:20:49<25:32, 32.62it/s]global step 950000, trans_decision ep_re 547.4651366903156

{"global_step": 950000, "eval_re": [420.61810707454873, 283.0566229592417, 
309.35691587651763, 993.8597487157316, 2093.1794375161553, 268.1714776729627, 
225.96214191854213, 163.8491386124799, 470.7776633713385, 245.82011318563843], 
"eval_len": [81, 53, 58, 180, 394, 51, 43, 31, 89, 47]}

 96%|█████████▌| 959997/1000000 [9:26:59<16:42, 39.89it/s]global step 960000, trans_decision ep_re 397.91335624334494

{"global_step": 960000, "eval_re": [377.64425677445695, 301.99118694539, 
286.8531608496026, 355.57854568289895, 304.87058202426255, 267.0678935554532, 
1094.6810438190726, 250.4449090646105, 291.43640039979545, 448.56558331790677], 
"eval_len": [67, 57, 54, 65, 57, 51, 201, 48, 55, 78]}

 97%|█████████▋| 969995/1000000 [9:32:40<12:33, 39.84it/s]global step 970000, trans_decision ep_re 400.3361004587538

{"global_step": 970000, "eval_re": [379.0335333419231, 464.9983919389241, 
468.2469617611545, 206.0914937814574, 252.6294403344312, 300.893077386463, 
331.4927980078534, 277.243725016346, 480.9996861872977, 841.7318968316866], 
"eval_len": [74, 82, 82, 40, 48, 55, 61, 53, 85, 172]}

 98%|█████████▊| 979996/1000000 [9:38:20<08:16, 40.26it/s]global step 980000, trans_decision ep_re 444.1183242317778

{"global_step": 980000, "eval_re": [433.60295434480895, 416.11438518606013, 
339.8846498542212, 315.92757751194586, 954.5737644269301, 269.8911357526951, 
307.4170721647579, 354.6885101345829, 780.942656522264, 268.14053641951267], 
"eval_len": [85, 75, 67, 59, 168, 53, 58, 69, 154, 52]}

 99%|█████████▉| 989997/1000000 [9:44:15<04:19, 38.60it/s]global step 990000, trans_decision ep_re 720.7251094818679

{"global_step": 990000, "eval_re": [512.161254411568, 979.1370222213638, 
332.5054309924541, 282.10001805231576, 250.10112081001802, 460.1630741946588, 
567.9857445631557, 274.6541108825305, 2421.981410216563, 1126.4619084740516], 
"eval_len": [91, 187, 60, 53, 47, 83, 102, 52, 470, 230]}

100%|█████████▉| 999999/1000000 [9:50:17<00:00, 40.32it/s]global step 1000000, trans_decision ep_re 560.603040430459

{"global_step": 1000000, "eval_re": [286.84262776543375, 271.0275999575133, 
884.7234620732125, 1369.0103230054156, 367.02693932708945, 434.517985457778, 
433.05249043861255, 461.9411658636637, 421.9699533891525, 675.917857026719], 
"eval_len": [57, 51, 175, 267, 68, 79, 80, 81, 83, 133]}

100%|██████████| 1000000/1000000 [9:50:29<00:00, 28.23it/s]
