
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9996/1000000 [03:02<5:59:19, 45.92it/s]global step 10000, trans_decision ep_re 273.2322543491331

{"global_step": 10000, "eval_re": [263.5082972569427, 289.7367325025077, 
258.2662900075302, 308.721728517442, 267.60765401754315, 254.61673933697912, 
254.84619639055572, 273.35917093248526, 280.35616804582594, 281.30356648351864],
"eval_len": [52, 56, 52, 61, 50, 52, 51, 53, 55, 56]}

  2%|▏         | 19995/1000000 [08:50<5:57:17, 45.72it/s]global step 20000, trans_decision ep_re 398.0796367238835

{"global_step": 20000, "eval_re": [393.71898290103843, 430.5694391492232, 
383.0538204663548, 349.82801418298374, 347.68919733660755, 380.73460263004, 
449.0346199863068, 393.7777151559316, 401.14250441805183, 451.2474710122969], 
"eval_len": [71, 81, 70, 63, 65, 70, 88, 73, 74, 86]}

  3%|▎         | 29997/1000000 [14:32<7:56:18, 33.94it/s]global step 30000, trans_decision ep_re 415.8391837593772

{"global_step": 30000, "eval_re": [360.87493598796385, 365.3976234023858, 
447.3967008917221, 558.2105530384047, 437.32628698992033, 425.8834271609609, 
402.90465161937504, 350.6243203668758, 435.1457351411244, 374.62760299503924], 
"eval_len": [68, 67, 83, 104, 82, 79, 76, 66, 81, 70]}

  4%|▍         | 39999/1000000 [20:30<7:42:05, 34.63it/s]global step 40000, trans_decision ep_re 438.69292215079224

{"global_step": 40000, "eval_re": [376.8459386407205, 449.93619731466623, 
530.4404539446841, 480.0647038641144, 401.5087610817203, 491.97915186383386, 
451.42664902687335, 406.68367660759185, 212.5132121345681, 585.5304770291491], 
"eval_len": [69, 84, 98, 88, 76, 90, 83, 73, 42, 117]}

  5%|▍         | 49997/1000000 [26:00<8:08:06, 32.44it/s]global step 50000, trans_decision ep_re 471.20160336030693

{"global_step": 50000, "eval_re": [536.0465842051607, 631.059175847057, 
496.0807270960602, 406.37696800766383, 507.86945647623054, 513.0733192877841, 
427.1974291266933, 452.3224350200511, 326.8123428459089, 415.1775956904607], 
"eval_len": [99, 117, 91, 75, 107, 94, 79, 84, 63, 78]}

  6%|▌         | 59997/1000000 [30:59<5:48:50, 44.91it/s]global step 60000, trans_decision ep_re 401.3721201981755

{"global_step": 60000, "eval_re": [455.88373709884985, 353.76063809978126, 
378.2390772346322, 466.15724327867076, 469.29464109184653, 369.1293073899671, 
364.54615888156206, 452.9671942199935, 408.9818440203645, 294.7613606660875], 
"eval_len": [86, 76, 68, 85, 85, 67, 67, 83, 75, 55]}

  7%|▋         | 69997/1000000 [36:02<5:46:40, 44.71it/s]global step 70000, trans_decision ep_re 449.82948652092665

{"global_step": 70000, "eval_re": [434.25620266856157, 342.03381324619653, 
438.4998639394337, 505.4623849698838, 426.42094811944827, 465.652597850054, 
448.8767035175685, 438.85246086037915, 505.8758856082665, 492.3640044294748], 
"eval_len": [80, 62, 81, 94, 79, 88, 83, 81, 94, 92]}

  8%|▊         | 79997/1000000 [41:03<5:43:42, 44.61it/s]global step 80000, trans_decision ep_re 463.1841612155439

{"global_step": 80000, "eval_re": [459.15731037584646, 401.3162798954448, 
407.75331079177334, 567.3702058050912, 462.42503687405923, 449.1839739285026, 
509.4024841276769, 446.08586183231705, 413.46562087681616, 515.6815276479108], 
"eval_len": [85, 73, 75, 105, 85, 82, 94, 82, 76, 95]}

  9%|▉         | 89997/1000000 [46:08<5:40:12, 44.58it/s]global step 90000, trans_decision ep_re 449.10465808578374

{"global_step": 90000, "eval_re": [455.6461331386407, 471.94588730219107, 
498.3140004016494, 471.2946035600672, 415.5603217991779, 435.0511611799746, 
450.4948356157817, 387.2282085462937, 457.43491394790175, 448.0765153661595], 
"eval_len": [84, 88, 93, 87, 77, 80, 83, 72, 83, 82]}

 10%|▉         | 99997/1000000 [51:17<5:35:32, 44.70it/s]global step 100000, trans_decision ep_re 497.7318536509305

{"global_step": 100000, "eval_re": [449.8383664307496, 313.13795308148815, 
696.4495937617239, 522.0574608520192, 576.3346108950501, 511.47183956332714, 
460.0737271039275, 498.59430059057007, 493.3776380346096, 455.98304619583985], 
"eval_len": [83, 58, 146, 97, 108, 95, 85, 93, 106, 84]}

 11%|█         | 109996/1000000 [56:34<5:22:46, 45.96it/s]global step 110000, trans_decision ep_re 499.5571803001079

{"global_step": 110000, "eval_re": [372.40074600132266, 401.5150967922985, 
352.5474459946639, 526.9454136121702, 603.6945848339549, 481.7775295347726, 
764.0832774347092, 572.1753857339711, 364.8692499485182, 555.5630731146982], 
"eval_len": [70, 82, 68, 100, 115, 91, 145, 113, 74, 102]}

 12%|█▏        | 119996/1000000 [1:01:44<5:22:16, 45.51it/s]global step 120000, trans_decision ep_re 628.3036852275642

{"global_step": 120000, "eval_re": [659.3666315132228, 632.1182108329614, 
699.8388399952021, 499.5272991786773, 697.1480931566701, 633.6432905518275, 
714.1475343586208, 532.1675785937246, 564.3079860363146, 650.7713880584214], 
"eval_len": [119, 117, 126, 98, 126, 114, 136, 102, 103, 119]}

 13%|█▎        | 129996/1000000 [1:06:47<5:15:28, 45.96it/s]global step 130000, trans_decision ep_re 581.5702949468716

{"global_step": 130000, "eval_re": [639.3050507561225, 522.5615756477035, 
597.565915508747, 550.2425196105152, 607.4674196328627, 549.639885376812, 
383.6744806163291, 927.7785512966908, 603.2037528648217, 434.2637981581107], 
"eval_len": [119, 97, 112, 103, 114, 102, 69, 179, 113, 81]}

 14%|█▍        | 139998/1000000 [1:11:49<5:11:24, 46.03it/s]global step 140000, trans_decision ep_re 616.6650226482998

{"global_step": 140000, "eval_re": [742.2991076683286, 468.1017601417872, 
600.4391267894683, 898.9292356627392, 593.5958770340918, 481.68025973999755, 
747.9734407987337, 590.3216632743856, 436.8383756835153, 606.4713796899509], 
"eval_len": [135, 87, 112, 175, 109, 91, 154, 116, 81, 109]}

 15%|█▍        | 149998/1000000 [1:16:52<5:22:17, 43.96it/s]global step 150000, trans_decision ep_re 653.8185469658252

{"global_step": 150000, "eval_re": [597.7798733079935, 742.9157593250353, 
514.2816462786549, 529.4855437520965, 408.41278904608197, 695.8952489758389, 
748.0361815595767, 645.5025125084593, 721.6022517860436, 934.2736631184717], 
"eval_len": [120, 142, 95, 102, 74, 133, 141, 123, 152, 196]}

 16%|█▌        | 159998/1000000 [1:22:00<5:02:48, 46.23it/s]global step 160000, trans_decision ep_re 574.1649233772966

{"global_step": 160000, "eval_re": [350.7233835137759, 928.8054222799126, 
602.3049556417279, 700.3133808577804, 478.1357977486404, 504.6357363899772, 
478.8697370113297, 761.1154495647634, 511.70259178086707, 425.0427789841926], 
"eval_len": [65, 185, 126, 134, 103, 109, 101, 143, 108, 79]}

 17%|█▋        | 169996/1000000 [1:27:15<8:27:06, 27.28it/s]global step 170000, trans_decision ep_re 680.5340653081637

{"global_step": 170000, "eval_re": [1105.4873702948298, 816.432017190734, 
590.388624774324, 453.47931558744074, 452.0942531722495, 806.2768868452464, 
580.69038394235, 528.200066783345, 773.7380740214127, 698.5536604697049], 
"eval_len": [220, 173, 126, 97, 97, 170, 109, 112, 165, 148]}

 18%|█▊        | 179996/1000000 [1:32:18<5:00:15, 45.52it/s]global step 180000, trans_decision ep_re 561.7589513758143

{"global_step": 180000, "eval_re": [557.5711216588047, 471.33353208342385, 
480.59301963275334, 653.9531322818539, 551.7916574444083, 806.3101955764567, 
526.8173650891573, 436.3076969951974, 522.350969713311, 610.5608232827759], 
"eval_len": [106, 87, 89, 140, 109, 154, 98, 95, 101, 121]}

 19%|█▉        | 189996/1000000 [1:37:28<4:58:14, 45.27it/s]global step 190000, trans_decision ep_re 691.0267463251083

{"global_step": 190000, "eval_re": [679.6318819805351, 767.6037081163213, 
536.1282950817196, 666.0661093837716, 824.876626824957, 384.71902099028625, 
888.6141531126839, 1055.3257270904255, 471.9814875318047, 635.3204531385793], 
"eval_len": [143, 153, 105, 128, 176, 75, 174, 218, 103, 126]}

 20%|█▉        | 199996/1000000 [1:42:54<4:53:06, 45.49it/s]global step 200000, trans_decision ep_re 666.7655087400668

{"global_step": 200000, "eval_re": [350.82913629372007, 647.3708611152526, 
775.8465409182521, 798.72425529092, 592.9432109060718, 933.1201310083857, 
621.9531399868598, 648.4314051056202, 783.9273419216098, 514.5090648539757], 
"eval_len": [70, 130, 151, 151, 111, 181, 113, 126, 148, 109]}

 21%|██        | 209995/1000000 [1:48:04<4:52:58, 44.94it/s]global step 210000, trans_decision ep_re 731.7697226867941

{"global_step": 210000, "eval_re": [1093.084958628251, 665.7417871511323, 
581.8078654201281, 1115.9012833238544, 628.1008445210633, 744.6393669228693, 
692.9264614315947, 766.4673861073826, 584.0449584414833, 444.9823149201819], 
"eval_len": [219, 129, 112, 213, 130, 140, 132, 146, 115, 82]}

 22%|██▏       | 219998/1000000 [1:53:14<4:47:25, 45.23it/s]global step 220000, trans_decision ep_re 849.8564076113477

{"global_step": 220000, "eval_re": [797.2166484626551, 498.1575292955854, 
783.9281098005871, 561.7356682449142, 803.5715043003905, 998.9024025189173, 
1293.2583302381404, 828.0771146043288, 918.8973105102173, 1014.8194581377397], 
"eval_len": [149, 101, 149, 111, 153, 198, 245, 155, 186, 193]}

 23%|██▎       | 229996/1000000 [1:58:26<5:05:00, 42.08it/s]global step 230000, trans_decision ep_re 567.4135064500279

{"global_step": 230000, "eval_re": [731.9831182393284, 784.8912907625956, 
151.48755413762154, 644.0301468310084, 972.1068756591638, 224.50986587720885, 
397.92717536428825, 573.4124106178458, 669.2162095990284, 524.5704174121889], 
"eval_len": [149, 152, 29, 124, 192, 44, 86, 109, 141, 109]}

 24%|██▍       | 239997/1000000 [2:04:08<5:38:35, 37.41it/s]global step 240000, trans_decision ep_re 833.9353844050763

{"global_step": 240000, "eval_re": [935.1681339171937, 989.693911111455, 
850.4742253449796, 667.6361721161782, 1342.8320204576635, 472.95742852480095, 
803.2394895019538, 978.5891349815977, 460.2494856639025, 838.5138424310386], 
"eval_len": [177, 191, 162, 140, 261, 101, 152, 194, 86, 161]}

 25%|██▍       | 249995/1000000 [2:09:40<4:52:51, 42.68it/s]global step 250000, trans_decision ep_re 891.9067835919162

{"global_step": 250000, "eval_re": [495.90333021551265, 1395.9328487108276, 
718.6175837049626, 779.0605869670952, 650.1371542722549, 1250.5449008359285, 
947.0842897257016, 1200.6941403644623, 282.57608174784525, 1198.5169193745724], 
"eval_len": [90, 267, 147, 147, 134, 241, 182, 227, 53, 244]}

 26%|██▌       | 259999/1000000 [2:15:18<6:11:36, 33.19it/s]global step 260000, trans_decision ep_re 882.5825134783394

{"global_step": 260000, "eval_re": [756.0323048934998, 534.5684347673347, 
829.1873069451648, 416.11632401230503, 1032.0544102624503, 1472.792985249944, 
450.2231127054585, 1253.2281411633987, 852.3575055663051, 1229.2646092175328], 
"eval_len": [148, 114, 161, 84, 198, 296, 97, 248, 182, 243]}

 27%|██▋       | 269995/1000000 [2:21:16<4:49:02, 42.09it/s]global step 270000, trans_decision ep_re 1070.4877449368055

{"global_step": 270000, "eval_re": [753.7706873927931, 1241.9086063086152, 
465.01273227588257, 765.8943762716133, 1432.4705592097814, 1534.7760262231927, 
862.839769357531, 1242.2825063626372, 1196.4928399646828, 1209.4293460013264], 
"eval_len": [142, 234, 84, 154, 277, 294, 160, 242, 229, 224]}

 28%|██▊       | 279999/1000000 [2:26:56<5:48:03, 34.48it/s]global step 280000, trans_decision ep_re 804.5604879472783

{"global_step": 280000, "eval_re": [601.9256412111774, 988.2691039838293, 
1283.0694923914612, 463.8068823145387, 976.6900592539275, 801.2027103220726, 
1087.741194135036, 880.741820452215, 282.4504903618605, 679.7074850466631], 
"eval_len": [123, 184, 234, 86, 189, 153, 205, 166, 56, 125]}

 29%|██▉       | 289999/1000000 [2:32:00<5:33:26, 35.49it/s]global step 290000, trans_decision ep_re 912.9874944625046

{"global_step": 290000, "eval_re": [711.6971475631932, 1623.5910437980617, 
725.1249112864806, 1399.8266920163971, 459.7658425146858, 1247.0455126022455, 
578.8414510432888, 811.8953954567302, 888.3517290065198, 683.7352193374439], 
"eval_len": [136, 300, 134, 268, 92, 230, 111, 158, 166, 148]}

 30%|██▉       | 299998/1000000 [2:37:16<4:57:00, 39.28it/s]global step 300000, trans_decision ep_re 1130.9858449777978

{"global_step": 300000, "eval_re": [1733.1331041340281, 586.6429222008063, 
1235.0306005801951, 461.91184153896836, 1395.7957680031207, 1317.9615863745016, 
1838.8729941058436, 1096.40600971718, 591.6637665642482, 1052.439856559086], 
"eval_len": [365, 111, 250, 88, 277, 259, 351, 219, 112, 199]}

 31%|███       | 309995/1000000 [2:42:30<4:35:11, 41.79it/s]global step 310000, trans_decision ep_re 975.2267322375167

{"global_step": 310000, "eval_re": [978.2548424350873, 1354.9665961367907, 
1197.4707652794675, 994.2737334298938, 421.06359940336245, 1268.2618724743527, 
633.3654611865397, 1162.2745625128914, 818.3266686785703, 924.0092208382097], 
"eval_len": [182, 250, 221, 196, 82, 251, 120, 220, 150, 168]}

 32%|███▏      | 319998/1000000 [2:47:40<4:20:39, 43.48it/s]global step 320000, trans_decision ep_re 1144.3027172972322

{"global_step": 320000, "eval_re": [1823.6195941037297, 711.5858047495227, 
1284.8614419410544, 1436.8608259132152, 1193.331385143121, 871.1543563454903, 
1214.137823004788, 626.3447538611957, 949.5698167412837, 1331.561371168922], 
"eval_len": [364, 155, 248, 267, 243, 168, 223, 116, 185, 261]}

 33%|███▎      | 329997/1000000 [2:52:49<4:02:41, 46.01it/s]global step 330000, trans_decision ep_re 886.5881589868648

{"global_step": 330000, "eval_re": [304.38601654542015, 824.0559704053006, 
1769.1858053844903, 917.6278121580119, 575.5162933197394, 634.9947554509514, 
859.1755664957104, 570.8056519451794, 1067.376096243318, 1342.7576219205262], 
"eval_len": [68, 151, 326, 172, 106, 117, 161, 105, 204, 249]}

 34%|███▍      | 339996/1000000 [2:58:06<3:59:30, 45.93it/s]global step 340000, trans_decision ep_re 1555.521615277973

{"global_step": 340000, "eval_re": [1628.5606620255298, 618.8083902506665, 
2783.8257023322208, 640.5647519941506, 782.4981013478593, 2171.452457404027, 
1591.8560684469392, 1932.1636791606277, 2428.3761071016042, 977.1102327161047], 
"eval_len": [322, 121, 562, 123, 162, 425, 327, 389, 488, 183]}

 35%|███▍      | 349995/1000000 [3:03:06<4:13:09, 42.79it/s]global step 350000, trans_decision ep_re 820.8180804773638

{"global_step": 350000, "eval_re": [1304.7396856449973, 870.7092187165157, 
562.9336931537283, 1210.9671825264832, 874.461261202713, 790.9105693112311, 
766.4397393225346, 527.3355818698021, 693.5250160317268, 606.1588569939058], 
"eval_len": [238, 164, 105, 241, 162, 145, 141, 98, 128, 113]}

 36%|███▌      | 359995/1000000 [3:08:06<3:54:29, 45.49it/s]global step 360000, trans_decision ep_re 1231.6749351571907

{"global_step": 360000, "eval_re": [2423.745098493036, 132.86504379456855, 
626.5592718522664, 1126.3790362516042, 566.7834220647404, 2475.7384985088834, 
517.6956901444896, 1765.1429228086156, 1118.1481486134808, 1563.6922190402227], 
"eval_len": [480, 28, 117, 208, 107, 468, 98, 335, 217, 292]}

 37%|███▋      | 369996/1000000 [3:12:57<3:48:31, 45.95it/s]global step 370000, trans_decision ep_re 1337.1640723194553

{"global_step": 370000, "eval_re": [1800.1686565909129, 842.4322517301999, 
1809.1066101936165, 914.6808693812671, 875.296219162837, 1867.324905969779, 
1133.2148977675836, 1533.515690571616, 908.3801749673515, 1687.5204468593897], 
"eval_len": [342, 160, 365, 194, 159, 354, 238, 308, 186, 328]}

 38%|███▊      | 379999/1000000 [3:18:36<3:48:18, 45.26it/s]global step 380000, trans_decision ep_re 1162.4813033051232

{"global_step": 380000, "eval_re": [827.722172971916, 2567.6002074103885, 
928.1604031034107, 786.2401846113345, 1106.8574733934086, 934.2526761330527, 
991.1088148817188, 1503.2070859890694, 1277.0695479782917, 702.5944665786419], 
"eval_len": [155, 479, 170, 143, 207, 170, 197, 304, 232, 142]}

 39%|███▉      | 389997/1000000 [3:24:08<4:46:52, 35.44it/s]global step 390000, trans_decision ep_re 1387.683739871197

{"global_step": 390000, "eval_re": [1713.7488337619584, 1078.2674778031528, 
850.8801880037299, 606.9166601330294, 2736.7462773620487, 1358.974207235793, 
782.0721119687524, 1407.5958334234049, 2027.2534181012697, 1314.3823909188282], 
"eval_len": [328, 218, 178, 126, 533, 268, 151, 259, 388, 255]}

 40%|███▉      | 399999/1000000 [3:29:46<3:46:22, 44.17it/s]global step 400000, trans_decision ep_re 1213.4594546111698

{"global_step": 400000, "eval_re": [859.37442767918, 3577.283252731297, 
1507.1385380526212, 963.6306806993102, 94.21827197055765, 183.00265163735156, 
179.92220907107162, 642.4278250429553, 625.997809331822, 3501.5988798955327], 
"eval_len": [166, 684, 287, 184, 20, 35, 34, 122, 124, 659]}

 41%|████      | 409999/1000000 [3:35:26<4:46:41, 34.30it/s]global step 410000, trans_decision ep_re 1587.0464282085

{"global_step": 410000, "eval_re": [2238.816191337491, 2949.960116966754, 
910.098370328228, 813.5097000157572, 1396.236315275307, 2780.813205070733, 
2054.6093327629715, 1108.0507331794827, 634.3128684267032, 984.0574487215724], 
"eval_len": [427, 575, 169, 153, 271, 527, 402, 212, 117, 192]}

 42%|████▏     | 419996/1000000 [3:41:06<3:27:55, 46.49it/s]global step 420000, trans_decision ep_re 1391.749518958996

{"global_step": 420000, "eval_re": [1587.6947217821298, 1937.9589619434978, 
1849.3671418600989, 1375.6891297773159, 949.6421430265041, 907.1988181642556, 
2521.406345936928, 900.0406998303155, 694.6744030023378, 1193.822824266577], 
"eval_len": [307, 371, 344, 265, 174, 181, 488, 181, 125, 216]}

 43%|████▎     | 429995/1000000 [3:46:17<3:46:30, 41.94it/s]global step 430000, trans_decision ep_re 1197.4126323631403

{"global_step": 430000, "eval_re": [2485.2905421734367, 1079.2238284529878, 
2603.784017371663, 288.145105435769, 936.6705651310544, 685.4525927967838, 
665.1870572202378, 553.9170856309615, 805.8850132132061, 1870.5705162053014], 
"eval_len": [478, 202, 509, 53, 178, 129, 131, 104, 169, 351]}

 44%|████▍     | 439998/1000000 [3:51:46<3:40:32, 42.32it/s]global step 440000, trans_decision ep_re 720.9898686036993

{"global_step": 440000, "eval_re": [738.6308260911813, 630.5190925989389, 
866.3298729012336, 621.175187072174, 739.4822671884899, 753.931286233058, 
581.2483593682095, 735.3503715289295, 769.795903997797, 773.4355190569809], 
"eval_len": [135, 117, 159, 114, 136, 138, 107, 143, 143, 145]}

 45%|████▍     | 449995/1000000 [3:57:26<3:38:29, 41.95it/s]global step 450000, trans_decision ep_re 1762.5137905649106

{"global_step": 450000, "eval_re": [1356.8119983292581, 5159.4481279758475, 
773.1136449400356, 727.5585598562743, 2641.7229119803865, 1144.9441439127738, 
580.9148048705359, 659.4210026680408, 3345.2289269853104, 1235.9737841306421], 
"eval_len": [267, 1000, 149, 145, 532, 228, 124, 131, 666, 237]}

 46%|████▌     | 459996/1000000 [4:02:46<3:19:56, 45.01it/s]global step 460000, trans_decision ep_re 1241.7340458557585

{"global_step": 460000, "eval_re": [1353.5996516425284, 561.0666739279653, 
552.536086718621, 1110.3984116479746, 546.12645602788, 1814.1809303006783, 
1713.2987329573298, 1579.7674617494754, 1787.6217266990238, 1398.744326886108], 
"eval_len": [265, 102, 109, 221, 117, 354, 317, 309, 350, 271]}

 47%|████▋     | 469996/1000000 [4:08:36<3:26:34, 42.76it/s]global step 470000, trans_decision ep_re 2577.6588995086304

{"global_step": 470000, "eval_re": [2001.551694935037, 3181.7408324590847, 
1131.0133186427252, 5045.241009167185, 2483.758428848145, 3332.947230052149, 
2337.241927150334, 2404.8788081969888, 1070.4189510735741, 2787.796794561085], 
"eval_len": [380, 616, 221, 1000, 479, 648, 464, 450, 193, 550]}

 48%|████▊     | 479997/1000000 [4:14:01<3:27:17, 41.81it/s]global step 480000, trans_decision ep_re 938.2454878847933

{"global_step": 480000, "eval_re": [1107.8777046877783, 1162.6081523932808, 
1296.2500711653167, 480.1378847727894, 1193.5418063674458, 815.5578891165661, 
588.5451532263587, 833.8112802048461, 1305.4768356693469, 598.6481012442038], 
"eval_len": [227, 228, 251, 94, 226, 153, 117, 174, 255, 115]}

 49%|████▉     | 489997/1000000 [4:19:27<3:23:11, 41.83it/s]global step 490000, trans_decision ep_re 1134.9667840614127

{"global_step": 490000, "eval_re": [1233.6089220366064, 2746.5053797665064, 
947.0269549087543, 863.1172272466603, 538.0908200696136, 1073.4222778789333, 
349.0119106826861, 571.3252016985384, 1009.1752531271804, 2018.3838931986486], 
"eval_len": [233, 521, 187, 175, 108, 207, 74, 112, 210, 389]}

 50%|████▉     | 499997/1000000 [4:25:06<3:13:04, 43.16it/s]global step 500000, trans_decision ep_re 1924.3400974550118

{"global_step": 500000, "eval_re": [1915.0061710417233, 1968.122227011623, 
1092.2475740201323, 2940.9798332008395, 3496.935537612154, 694.8567595219716, 
1667.3319160015076, 1013.511447144952, 1488.3373703392574, 2966.0721386559553], 
"eval_len": [360, 381, 215, 567, 661, 129, 313, 192, 284, 582]}

 51%|█████     | 509995/1000000 [4:30:46<3:16:37, 41.53it/s]global step 510000, trans_decision ep_re 1418.6346777438307

{"global_step": 510000, "eval_re": [1603.3393702781098, 803.6772907673433, 
935.1245987215692, 1483.5354558121896, 705.7513474477197, 3656.4124029304335, 
1136.3495959710692, 924.3497305299863, 2498.5743825807226, 439.23260239916414], 
"eval_len": [302, 163, 176, 289, 131, 689, 209, 172, 471, 87]}

 52%|█████▏    | 519998/1000000 [4:36:16<3:09:17, 42.26it/s]global step 520000, trans_decision ep_re 1680.6019399779639

{"global_step": 520000, "eval_re": [870.6461089835724, 1693.7165512896036, 
3972.083306690932, 1437.683130076063, 1971.331028149866, 873.0641564716657, 
977.4431353983549, 1016.4186315545055, 1072.0989065068488, 2921.534444658224], 
"eval_len": [167, 328, 768, 287, 397, 167, 193, 194, 220, 567]}

 53%|█████▎    | 529997/1000000 [4:41:46<3:07:21, 41.81it/s]global step 530000, trans_decision ep_re 2005.287476099066

{"global_step": 530000, "eval_re": [1709.1920020504074, 2909.7775973842877, 
461.64704920714615, 1185.375633936111, 5270.984003066206, 2912.856239398287, 
1170.661469122523, 1264.2522213783122, 1909.219372787389, 1258.9091726599904], 
"eval_len": [332, 565, 87, 232, 1000, 569, 218, 249, 371, 248]}

 54%|█████▍    | 539995/1000000 [4:47:26<2:57:56, 43.08it/s]global step 540000, trans_decision ep_re 2116.993876119237

{"global_step": 540000, "eval_re": [4367.50947221147, 1904.6109741424486, 
1297.9845116156087, 2583.9425173610593, 1265.926475679055, 1409.890337127467, 
2212.6715654386544, 1292.4811139185217, 1582.4446308004533, 3252.4771628976355],
"eval_len": [845, 369, 264, 498, 242, 274, 430, 258, 303, 610]}

 55%|█████▍    | 549997/1000000 [4:52:56<3:01:33, 41.31it/s]global step 550000, trans_decision ep_re 3349.313366497

{"global_step": 550000, "eval_re": [5403.224678739229, 578.8666279774809, 
4339.86388481662, 3727.4812293400228, 5310.721279867113, 1197.8504487121465, 
1920.9436875199663, 5170.951026386593, 2362.485654199532, 3480.745147411299], 
"eval_len": [1000, 125, 816, 722, 1000, 234, 361, 1000, 435, 637]}

 56%|█████▌    | 559995/1000000 [4:58:37<2:56:32, 41.54it/s]global step 560000, trans_decision ep_re 2584.583484493128

{"global_step": 560000, "eval_re": [362.9113949759766, 2371.8857948770915, 
2143.9923154499365, 1362.6168057541051, 4723.801080690874, 645.8973195481781, 
5149.2177270751845, 1215.034403075925, 2702.4195405354394, 5168.058462948562], 
"eval_len": [65, 435, 404, 265, 878, 119, 1000, 231, 529, 1000]}

 57%|█████▋    | 569995/1000000 [5:04:17<2:53:05, 41.40it/s]global step 570000, trans_decision ep_re 1566.9340119032945

{"global_step": 570000, "eval_re": [1273.5402861010132, 609.9966087617076, 
4297.001032254568, 1347.262496769819, 1874.3687845842103, 742.5966876223289, 
1243.9618474504177, 808.6453322811475, 1251.554890306136, 2220.4121529015956], 
"eval_len": [237, 114, 771, 247, 334, 141, 226, 156, 233, 402]}

 58%|█████▊    | 579999/1000000 [5:09:47<2:40:33, 43.60it/s]global step 580000, trans_decision ep_re 3013.1731066649004

{"global_step": 580000, "eval_re": [1778.2786494953282, 4217.284752550262, 
3178.15168484928, 3094.841165580556, 5339.703784182905, 1779.5112058933885, 
5272.554314117965, 2193.459286536014, 2467.304518267219, 810.6417051760861], 
"eval_len": [376, 799, 583, 590, 992, 345, 1000, 428, 467, 157]}

 59%|█████▉    | 589995/1000000 [5:15:27<2:43:20, 41.84it/s]global step 590000, trans_decision ep_re 2173.4577102377743

{"global_step": 590000, "eval_re": [656.9200553798732, 507.84918291168157, 
5428.156137651428, 3075.7385138066784, 1889.6339068083237, 3083.8091082602928, 
1916.6893671760463, 1076.9396400281835, 3674.4324663808807, 424.4087239743518], 
"eval_len": [131, 105, 1000, 652, 349, 580, 353, 203, 694, 87]}

 60%|█████▉    | 599998/1000000 [5:21:07<2:39:23, 41.82it/s]global step 600000, trans_decision ep_re 1723.8893653405808

{"global_step": 600000, "eval_re": [1557.6541734800398, 2258.15428720227, 
754.4338125094953, 691.8694185213686, 2574.6817765692717, 680.4174224123162, 
2568.2828998640234, 550.6690211731793, 3947.6159582729574, 1655.114883400886], 
"eval_len": [319, 485, 157, 130, 496, 136, 484, 115, 789, 320]}

 61%|██████    | 609998/1000000 [5:26:37<2:33:06, 42.46it/s]global step 610000, trans_decision ep_re 2871.0733744869103

{"global_step": 610000, "eval_re": [535.9669467797063, 2113.8537067562124, 
389.6466814004941, 5263.817243633415, 5044.916199987363, 1194.7646899438457, 
5335.916889186279, 406.1783908821327, 5298.41209733437, 3127.2608989652845], 
"eval_len": [112, 390, 73, 1000, 960, 241, 1000, 76, 1000, 610]}

 62%|██████▏   | 619995/1000000 [5:32:17<2:31:01, 41.93it/s]global step 620000, trans_decision ep_re 2222.8158868355567

{"global_step": 620000, "eval_re": [2717.563394834137, 1678.8344653632464, 
2248.9950446560997, 495.71172783987976, 322.50836437932844, 1414.374402725901, 
3915.722389017748, 3974.9122224433295, 2319.597248205536, 3139.93960889036], 
"eval_len": [509, 355, 432, 99, 62, 290, 723, 770, 445, 602]}

 63%|██████▎   | 629999/1000000 [5:37:57<2:28:34, 41.51it/s]global step 630000, trans_decision ep_re 1889.8814411197102

{"global_step": 630000, "eval_re": [990.8400070332324, 1769.3731631534527, 
5175.895811570804, 611.3419191223753, 476.5121404574635, 2378.054477281974, 
1603.0893792711508, 352.39612863845724, 4588.538467060833, 952.7729176073597], 
"eval_len": [186, 328, 1000, 126, 83, 439, 298, 68, 836, 182]}

 64%|██████▍   | 639995/1000000 [5:43:27<2:24:22, 41.56it/s]global step 640000, trans_decision ep_re 2619.0959220452146

{"global_step": 640000, "eval_re": [5431.915960162384, 1233.2134707375087, 
1411.3150027772995, 1874.7900677716273, 3159.401906509506, 3822.1831383286694, 
603.6561386458363, 2177.823548781072, 1064.029205175946, 5412.630781562299], 
"eval_len": [1000, 225, 269, 342, 609, 752, 121, 404, 225, 1000]}

 65%|██████▍   | 649995/1000000 [5:49:07<2:19:05, 41.94it/s]global step 650000, trans_decision ep_re 2279.6101654194326

{"global_step": 650000, "eval_re": [1991.8150748390492, 939.5148527463083, 
1628.0962087145501, 4457.027380063069, 969.6003875760213, 699.8654948603815, 
5551.733509051096, 2379.8795756776112, 1694.870098262405, 2483.6990724038374], 
"eval_len": [354, 171, 297, 772, 190, 129, 1000, 436, 308, 443]}

 66%|██████▌   | 659999/1000000 [5:54:37<2:16:22, 41.55it/s]global step 660000, trans_decision ep_re 2601.6898908326893

{"global_step": 660000, "eval_re": [2641.3196043745093, 3817.1666860898026, 
1843.9344244672627, 5082.815669156367, 1579.2415758531565, 3662.9606408116483, 
994.0809517690791, 1073.264086612008, 1780.6842098821012, 3541.4310593109576], 
"eval_len": [531, 690, 342, 917, 288, 660, 184, 196, 342, 655]}

 67%|██████▋   | 669995/1000000 [6:00:18<2:07:27, 43.15it/s]global step 670000, trans_decision ep_re 2219.562563164908

{"global_step": 670000, "eval_re": [3081.8954909283607, 3040.5895987287977, 
5448.379551525693, 1034.9068753211395, 578.4302626316496, 2992.988105579585, 
1297.2345951985894, 936.0261929615571, 3474.236570385549, 310.9383883881553], 
"eval_len": [585, 561, 1000, 197, 110, 542, 236, 183, 619, 56]}

 68%|██████▊   | 679997/1000000 [6:05:58<2:07:34, 41.81it/s]global step 680000, trans_decision ep_re 2611.0992578077216

{"global_step": 680000, "eval_re": [5363.26471719892, 1597.5699253483745, 
414.03083398845246, 2803.2297401456162, 1854.8317993882854, 1875.1662633137885, 
1419.3610618659627, 4496.286550120433, 3758.408266120316, 2528.843420587068], 
"eval_len": [1000, 312, 78, 524, 341, 337, 261, 811, 693, 478]}

 69%|██████▉   | 689995/1000000 [6:11:28<2:03:13, 41.93it/s]global step 690000, trans_decision ep_re 1439.7126899399323

{"global_step": 690000, "eval_re": [770.9240043992264, 1434.349451721318, 
1084.0627039952205, 960.2501147841975, 1658.2396907716295, 3308.8379422173857, 
1255.121144682257, 1384.578776471675, 1488.5860911543275, 1052.1769792020864], 
"eval_len": [152, 267, 199, 181, 317, 606, 224, 258, 268, 191]}

 70%|██████▉   | 699997/1000000 [6:16:58<1:59:42, 41.77it/s]global step 700000, trans_decision ep_re 2407.225123015716

{"global_step": 700000, "eval_re": [5558.069572061895, 558.7265160630033, 
1068.882552428077, 1095.4595243695646, 4748.60684450478, 3432.6055119515554, 
2930.8119284520767, 325.4532903915891, 3681.130903211788, 672.5045867228284], 
"eval_len": [1000, 120, 192, 208, 871, 647, 540, 62, 667, 121]}

 71%|███████   | 709995/1000000 [6:22:38<1:51:22, 43.40it/s]global step 710000, trans_decision ep_re 2314.2236209510384

{"global_step": 710000, "eval_re": [5721.040895022644, 1437.9504440971068, 
666.1707944745306, 3928.0736328583985, 1993.3537974920048, 666.5862531400797, 
2066.6760531995074, 4172.634551584221, 579.0123884162673, 1910.7373992256248], 
"eval_len": [1000, 267, 120, 689, 402, 118, 389, 756, 108, 339]}

 72%|███████▏  | 719996/1000000 [6:28:08<1:50:08, 42.37it/s]global step 720000, trans_decision ep_re 2860.351908431291

{"global_step": 720000, "eval_re": [2463.7252169844996, 1592.6835532916045, 
2240.1494652295514, 2565.2843026631294, 4797.422066418963, 3868.1988835983834, 
4257.468985899996, 669.3982268484427, 3490.953190083139, 2658.235193295199], 
"eval_len": [436, 287, 386, 444, 847, 665, 746, 119, 594, 484]}

 73%|███████▎  | 729995/1000000 [6:33:48<1:47:22, 41.91it/s]global step 730000, trans_decision ep_re 2545.2344435295868

{"global_step": 730000, "eval_re": [3599.4745177212562, 3827.011635780856, 
1716.4592382564297, 296.17728620175876, 3735.727293381896, 726.0928741543562, 
1545.1727594732818, 3759.6213393264156, 5630.419852646424, 616.1876383531911], 
"eval_len": [641, 703, 327, 54, 670, 127, 289, 665, 1000, 126]}

 74%|███████▍  | 739998/1000000 [6:39:28<1:42:35, 42.24it/s]global step 740000, trans_decision ep_re 1837.3929689672962

{"global_step": 740000, "eval_re": [1389.060110882518, 884.9265642595983, 
1622.8973098411434, 2623.974391263378, 2080.835140658554, 2079.708689320567, 
988.7402730591941, 3174.2363933773354, 1900.3111929908243, 1629.2396240198511], 
"eval_len": [259, 154, 296, 483, 373, 387, 179, 586, 341, 290]}

 75%|███████▍  | 749995/1000000 [6:44:58<1:35:46, 43.51it/s]global step 750000, trans_decision ep_re 2962.250198023491

{"global_step": 750000, "eval_re": [4012.8840414485962, 2080.8602876652394, 
1173.0792967985844, 486.7166671568249, 5072.790481538826, 5399.65079123483, 
1984.5155244349821, 5350.6186792235485, 1040.2177641327753, 3021.1684466007027],
"eval_len": [726, 387, 234, 89, 927, 1000, 368, 1000, 190, 566]}

 76%|███████▌  | 759995/1000000 [6:50:38<1:35:52, 41.72it/s]global step 760000, trans_decision ep_re 3215.620205782113

{"global_step": 760000, "eval_re": [1146.3801756764026, 5674.201307396339, 
2231.558068556908, 3457.796002382886, 2177.1827647729006, 4430.398389734944, 
760.9873231653756, 5772.688453731938, 2819.4833910172274, 3685.526181386208], 
"eval_len": [205, 1000, 406, 611, 386, 774, 141, 1000, 496, 673]}

 77%|███████▋  | 769998/1000000 [6:56:18<1:31:22, 41.95it/s]global step 770000, trans_decision ep_re 1819.1125857646723

{"global_step": 770000, "eval_re": [1810.467609626866, 1849.7994411786547, 
2345.153783555952, 1977.880290258058, 3825.5366124165585, 1530.5318822574318, 
2758.986687346989, 910.3554786907054, 752.977735004486, 429.4363373110207], 
"eval_len": [323, 334, 415, 357, 676, 281, 501, 171, 148, 81]}

 78%|███████▊  | 779995/1000000 [7:01:48<1:27:29, 41.91it/s]global step 780000, trans_decision ep_re 1463.0102008147305

{"global_step": 780000, "eval_re": [1053.9353455617463, 689.1954622627517, 
917.0109662534205, 2150.085830724158, 1597.73706757955, 978.8349653104543, 
3449.7818657127923, 1284.7175875931796, 826.3013469193749, 1682.5015702298788], 
"eval_len": [201, 135, 170, 404, 314, 192, 635, 235, 148, 315]}

 79%|███████▉  | 789998/1000000 [7:07:19<1:17:53, 44.93it/s]global step 790000, trans_decision ep_re 2021.9656810761965

{"global_step": 790000, "eval_re": [674.5397472174601, 3604.6140894185382, 
1774.275990971441, 2080.3130472345015, 2579.017963458561, 644.4778859247144, 
457.90907277870406, 1912.6541916868218, 3195.000665240288, 3296.8541568309333], 
"eval_len": [119, 652, 356, 373, 486, 120, 91, 336, 603, 581]}

 80%|███████▉  | 799995/1000000 [7:12:59<1:19:12, 42.09it/s]global step 800000, trans_decision ep_re 2550.3040856157463

{"global_step": 800000, "eval_re": [2724.2388231004275, 993.4682567401425, 
5538.273682048879, 1918.3848649518577, 1543.538751707965, 1271.0324800156761, 
5035.849086760617, 2145.6366121087726, 3717.0672917800916, 615.5510069430355], 
"eval_len": [508, 183, 1000, 344, 276, 229, 920, 390, 664, 111]}

 81%|████████  | 809996/1000000 [7:18:29<1:15:26, 41.97it/s]global step 810000, trans_decision ep_re 1607.1257119438171

{"global_step": 810000, "eval_re": [3099.8882450775513, 2188.549879629667, 
3150.8794165969557, 655.2407799611866, 901.3423170654827, 2253.9018660616284, 
1080.2245101982985, 1169.5647783911054, 608.9446057572286, 962.7207206990685], 
"eval_len": [541, 406, 571, 125, 158, 397, 198, 220, 109, 187]}

 82%|████████▏ | 819999/1000000 [7:24:09<1:11:57, 41.69it/s]global step 820000, trans_decision ep_re 2910.896406408535

{"global_step": 820000, "eval_re": [371.19385328566995, 672.1200823781019, 
2422.7463970952494, 1437.0727523380162, 4205.493790501822, 3045.8574067945237, 
3290.6380428270854, 4127.706331087839, 4668.62469238595, 4867.510715391088], 
"eval_len": [75, 126, 437, 269, 775, 578, 613, 768, 866, 904]}

 83%|████████▎ | 829998/1000000 [7:29:39<1:02:55, 45.03it/s]global step 830000, trans_decision ep_re 3002.165216416253

{"global_step": 830000, "eval_re": [2563.3981881942786, 2661.3856762006135, 
1301.7958505709787, 3842.146959708004, 5400.333683446825, 2220.784287625445, 
2673.84621243424, 4084.5845139067674, 1515.8765862231378, 3757.5002058522405], 
"eval_len": [489, 481, 237, 692, 1000, 414, 495, 755, 281, 700]}

 84%|████████▍ | 839997/1000000 [7:35:29<1:03:50, 41.77it/s]global step 840000, trans_decision ep_re 2450.6730551008377

{"global_step": 840000, "eval_re": [4331.59648078637, 3171.9400955548276, 
3870.9044386062446, 3122.256773828346, 895.177344664691, 5424.058375361714, 
745.2835133942499, 552.3799199923867, 503.1909516494986, 1889.9426571700506], 
"eval_len": [796, 579, 712, 588, 160, 1000, 139, 104, 94, 346]}

 85%|████████▍ | 849995/1000000 [7:40:59<59:31, 42.00it/s]global step 850000, trans_decision ep_re 2179.6781103449016

{"global_step": 850000, "eval_re": [5225.480462063993, 735.4869199881638, 
2281.00250715518, 1852.551398275482, 5722.789021272505, 523.1716777090862, 
3547.450229710866, 870.1598460104545, 490.53726954891476, 548.1517717143731], 
"eval_len": [931, 135, 410, 337, 1000, 100, 623, 159, 94, 101]}

 86%|████████▌ | 859997/1000000 [7:46:39<55:57, 41.70it/s]global step 860000, trans_decision ep_re 1771.403826253476

{"global_step": 860000, "eval_re": [3285.505852177556, 1682.211984975025, 
1056.159820735857, 1067.2117695169486, 3070.5780215347654, 2579.7640930704406, 
2816.8846262185184, 692.2663951566553, 236.6507347333642, 1226.8049644156301], 
"eval_len": [563, 287, 197, 187, 534, 451, 484, 124, 45, 230]}

 87%|████████▋ | 869995/1000000 [7:52:09<51:44, 41.88it/s]global step 870000, trans_decision ep_re 2546.949437049481

{"global_step": 870000, "eval_re": [1702.100914877949, 5921.341674320174, 
961.232662448673, 3714.693144859547, 2995.525973666689, 1739.4584339178414, 
1551.4780018872805, 3579.8170169711584, 2599.2034162120376, 704.6431313334618], 
"eval_len": [318, 1000, 163, 642, 517, 308, 274, 629, 449, 133]}

 88%|████████▊ | 879995/1000000 [7:57:49<47:52, 41.78it/s]global step 880000, trans_decision ep_re 3200.293298106431

{"global_step": 880000, "eval_re": [5527.646370787929, 5496.535282176382, 
844.0587710701532, 1445.7377360916432, 881.7594100609734, 5546.391883856927, 
3374.322949204128, 2168.26697688862, 1168.1030610823725, 5550.110539845184], 
"eval_len": [1000, 1000, 152, 270, 164, 1000, 631, 401, 222, 1000]}

 89%|████████▉ | 889995/1000000 [8:03:29<44:02, 41.63it/s]global step 890000, trans_decision ep_re 3080.418132481473

{"global_step": 890000, "eval_re": [1299.7834286657846, 5465.38403822615, 
3122.5699075900397, 5603.573858295088, 2891.3544709299676, 2380.453009704953, 
1183.8361855767512, 1405.8151613070966, 4359.765917421402, 3091.645347097501], 
"eval_len": [236, 1000, 570, 1000, 520, 432, 217, 257, 779, 574]}

 90%|████████▉ | 899995/1000000 [8:09:09<39:47, 41.88it/s]global step 900000, trans_decision ep_re 2917.121321521852

{"global_step": 900000, "eval_re": [5362.828649951036, 410.50048267273627, 
817.3566863321527, 2455.8850781397537, 504.36853466305564, 4571.25983672817, 
5396.445194720979, 3206.826457960826, 1177.9691854569644, 5267.773108592852], 
"eval_len": [1000, 77, 155, 459, 106, 860, 1000, 601, 223, 1000]}

 91%|█████████ | 909997/1000000 [8:14:49<35:33, 42.18it/s]global step 910000, trans_decision ep_re 1551.736964260333

{"global_step": 910000, "eval_re": [1132.7821444770948, 1065.8062598603503, 
746.6201231550133, 668.2394123566299, 899.2002856348832, 4253.132337042676, 
2796.8495009672774, 1597.3611316892639, 1045.9809544741354, 1311.397492946007], 
"eval_len": [217, 190, 148, 121, 163, 765, 509, 300, 186, 232]}

 92%|█████████▏| 919995/1000000 [8:20:19<30:43, 43.39it/s]global step 920000, trans_decision ep_re 3174.819076454047

{"global_step": 920000, "eval_re": [3182.9916420743657, 5381.109643190938, 
579.8820642368714, 3527.3229159574425, 2515.530288011102, 5476.469504433956, 
866.3884150372803, 5497.4473568457615, 1386.5323404461938, 3334.516594306557], 
"eval_len": [595, 974, 107, 640, 454, 991, 161, 1000, 247, 613]}

 93%|█████████▎| 929995/1000000 [8:26:09<27:49, 41.93it/s]global step 930000, trans_decision ep_re 2751.9265518793054

{"global_step": 930000, "eval_re": [5714.236058702845, 597.5993970784733, 
553.8308134525502, 5620.816032491603, 2194.6285359290378, 1221.5257449641226, 
3868.1264895549116, 1330.7985089250492, 5599.963887736793, 817.740049957668], 
"eval_len": [1000, 111, 110, 987, 407, 234, 700, 265, 1000, 157]}

 94%|█████████▍| 939998/1000000 [8:31:39<23:46, 42.07it/s]global step 940000, trans_decision ep_re 2503.7361043144315

{"global_step": 940000, "eval_re": [5373.273871172293, 227.66210727718243, 
2014.8085026091298, 3632.8208685166496, 1324.269301309483, 839.6247683969476, 
1614.6285247460953, 4992.142923952798, 4408.848319542041, 609.2818556216943], 
"eval_len": [918, 43, 358, 629, 231, 155, 298, 876, 766, 119]}

 95%|█████████▍| 949995/1000000 [8:37:20<19:59, 41.70it/s]global step 950000, trans_decision ep_re 2032.4928698713825

{"global_step": 950000, "eval_re": [401.03797907893454, 528.7987674558827, 
1761.6786222144585, 1082.6305953676626, 4294.871120506514, 558.3048969496363, 
1271.875260362515, 4033.59946924388, 5696.705332209557, 695.4266553247888], 
"eval_len": [75, 96, 315, 185, 747, 98, 239, 708, 1000, 125]}

 96%|█████████▌| 959999/1000000 [8:42:50<15:00, 44.40it/s]global step 960000, trans_decision ep_re 2855.172132068955

{"global_step": 960000, "eval_re": [732.7726566252376, 5725.913052714577, 
2119.0313274946307, 2514.2548187372404, 5140.9955284813805, 1429.5504570069172, 
1144.0298309009986, 1609.1135806407, 5772.258567493048, 2363.801500594816], 
"eval_len": [131, 996, 391, 453, 883, 257, 206, 296, 1000, 404]}

 97%|█████████▋| 969995/1000000 [8:48:30<13:04, 38.25it/s]global step 970000, trans_decision ep_re 3028.0230606260475

{"global_step": 970000, "eval_re": [5880.27241994879, 2789.887113102773, 
5842.444222100434, 2952.330152733018, 782.8995659055247, 4513.1523580194125, 
836.0487701841945, 893.3293403536494, 1647.878897517062, 4141.987766395619], 
"eval_len": [1000, 489, 1000, 516, 147, 768, 147, 160, 283, 704]}

 98%|█████████▊| 979995/1000000 [8:54:10<07:59, 41.73it/s]global step 980000, trans_decision ep_re 2321.2540106604883

{"global_step": 980000, "eval_re": [2583.279466751602, 2682.599807497159, 
3100.609560373037, 1088.4388074544418, 920.4209739029861, 1576.024620449721, 
1083.1483355609694, 887.3934258418259, 5893.85346723899, 3396.7716415341542], 
"eval_len": [449, 455, 540, 186, 158, 291, 191, 157, 1000, 571]}

 99%|█████████▉| 989999/1000000 [8:59:50<03:59, 41.83it/s]global step 990000, trans_decision ep_re 2670.4744859741872

{"global_step": 990000, "eval_re": [5722.500028532329, 3246.6743080711826, 
1709.3258164181316, 3582.1355633753615, 1061.977195028819, 1835.580387845529, 
1046.2081714145968, 1461.3416483886188, 5703.738784117388, 1335.2629565499146], 
"eval_len": [1000, 575, 302, 652, 194, 339, 193, 259, 1000, 262]}

100%|█████████▉| 999997/1000000 [9:05:40<00:00, 41.42it/s]global step 1000000, trans_decision ep_re 2869.5456781670723

{"global_step": 1000000, "eval_re": [1590.8991504307119, 3890.9713026851446, 
960.471332749055, 5864.247965715923, 1346.7736075201387, 2312.2907979935717, 
1777.6484852227943, 328.9609300735482, 5853.4367857783145, 4769.756423501519], 
"eval_len": [276, 653, 170, 1000, 235, 394, 301, 65, 1000, 809]}

100%|██████████| 1000000/1000000 [9:05:47<00:00, 30.54it/s]
