
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:30<13:04:39, 21.03it/s]global step 10000, trans_decision ep_re 292.2716456163188

{"global_step": 10000, "eval_re": [513.0897829380714, 439.1344941695165, 
390.88573435477383, 29.517058460890894, 33.595933906385284, 426.9310434264581, 
512.3902890853151, 111.9737461051241, 414.7616085024894, 50.43676521416362], 
"eval_len": [1000, 1000, 1000, 53, 39, 1000, 1000, 111, 1000, 80]}

  2%|▏         | 19999/1000000 [16:30<12:57:29, 21.01it/s]global step 20000, trans_decision ep_re 598.2888012611097

{"global_step": 20000, "eval_re": [687.7461923432071, 698.3475385293497, 
644.9661547001006, 378.679171450653, 436.98111301102745, 650.5896849257125, 
273.29764080959984, 727.54212026059, 687.4662698791792, 797.272126701679], 
"eval_len": [1000, 1000, 1000, 406, 665, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29998/1000000 [27:40<13:14:19, 20.35it/s]global step 30000, trans_decision ep_re 626.5320243012644

{"global_step": 30000, "eval_re": [931.4580893377832, 28.491400607321356, 
144.79156848379654, 762.9061881933062, 909.2621968923936, 1007.2552012941475, 
371.50657170339394, 390.2177548148355, 888.6893377784443, 830.741933907221], 
"eval_len": [1000, 43, 170, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [38:50<12:56:29, 20.61it/s]global step 40000, trans_decision ep_re 475.40488515320067

{"global_step": 40000, "eval_re": [409.9554780833099, 701.4051666528628, 
954.742582118787, 94.52001242370851, 131.22254156115034, 755.2423587185892, 
693.2805146058247, 62.62434581123711, 130.44406009147045, 820.6117914650671], 
"eval_len": [381, 760, 1000, 100, 130, 686, 1000, 60, 128, 1000]}

  5%|▍         | 49998/1000000 [49:50<12:25:38, 21.23it/s]global step 50000, trans_decision ep_re 383.7627075853028

{"global_step": 50000, "eval_re": [325.25881810997123, 55.942052392270575, 
900.0449595536802, 78.53256638929837, 773.9476835013666, 132.69266817578577, 
441.43453413293963, 267.55097942598417, 795.6766283152026, 66.54618585652794], 
"eval_len": [325, 105, 1000, 81, 1000, 119, 385, 274, 721, 69]}

  6%|▌         | 59999/1000000 [1:00:30<12:31:11, 20.86it/s]global step 60000, trans_decision ep_re 245.37856628844997

{"global_step": 60000, "eval_re": [50.54906665299718, 846.0146089927302, 
208.9530582489953, 89.89816042534594, 149.38666994184626, 176.87055891419007, 
115.13682593503174, 135.98253659933422, 628.1919153424525, 52.802261831576075], 
"eval_len": [58, 875, 261, 75, 136, 139, 79, 132, 1000, 58]}

  7%|▋         | 69999/1000000 [1:11:20<12:14:05, 21.11it/s]global step 70000, trans_decision ep_re 432.87546238781

{"global_step": 70000, "eval_re": [1035.9361116372756, 750.9704249069691, 
69.3238171236889, 144.61887058875948, 425.7189419623803, 105.90533216766296, 
302.2401245327435, 422.9778328336213, 141.08421233116536, 929.9789557938334], 
"eval_len": [890, 1000, 68, 152, 288, 95, 240, 362, 182, 865]}

  8%|▊         | 79999/1000000 [1:22:10<12:20:00, 20.72it/s]global step 80000, trans_decision ep_re 246.47406759160896

{"global_step": 80000, "eval_re": [100.02806988872062, 50.10671169777071, 
601.6072013175306, 403.44203144911376, 142.73285122805808, 80.25112006072723, 
120.63132988435761, 658.8571230605423, 131.1841681052325, 175.9000692240365], 
"eval_len": [99, 60, 1000, 274, 154, 59, 100, 1000, 102, 304]}

  9%|▉         | 89999/1000000 [1:32:50<11:59:00, 21.09it/s]global step 90000, trans_decision ep_re 280.36271334594437

{"global_step": 90000, "eval_re": [67.13656609142491, 482.52771011694864, 
614.0846540571632, 74.09345863581599, 440.71221087223194, 128.31875280359017, 
149.53652068998758, 266.9278516537191, 348.77522553180887, 231.51418300675303], 
"eval_len": [66, 1000, 1000, 63, 385, 101, 110, 216, 303, 175]}

 10%|▉         | 99998/1000000 [1:43:40<11:58:56, 20.86it/s]global step 100000, trans_decision ep_re 555.8401023997269

{"global_step": 100000, "eval_re": [902.8094908516201, 125.45992839860932, 
821.3269628981187, 725.6151397068835, 109.87692967970001, 1138.154549388088, 
37.28988868113848, 852.9619157763611, 760.6161384251561, 84.29008019159417], 
"eval_len": [1000, 222, 1000, 1000, 124, 1000, 43, 1000, 1000, 72]}

 11%|█         | 109998/1000000 [1:54:40<11:30:44, 21.47it/s]global step 110000, trans_decision ep_re 457.1170392602911

{"global_step": 110000, "eval_re": [145.34050444722033, 555.2487185332222, 
32.77737583681347, 640.6634067504307, 951.354559288399, 74.25352625042711, 
960.8777685929101, 113.35900354276181, 508.13313559985943, 589.1623937608667], 
"eval_len": [96, 1000, 54, 1000, 875, 107, 1000, 133, 627, 1000]}

 12%|█▏        | 119999/1000000 [2:05:40<11:53:07, 20.57it/s]global step 120000, trans_decision ep_re 735.0800753540611

{"global_step": 120000, "eval_re": [235.8905366584168, 1245.369218892426, 
301.29742365780714, 762.2653175739937, 125.8687793132928, 93.43562501600046, 
1225.456247609343, 1196.7877591284164, 1252.339158395475, 912.090687295441], 
"eval_len": [236, 1000, 224, 627, 103, 73, 1000, 1000, 839, 827]}

 13%|█▎        | 129999/1000000 [2:16:40<11:32:19, 20.94it/s]global step 130000, trans_decision ep_re 421.17883495851993

{"global_step": 130000, "eval_re": [118.45172189515446, 186.64940489480506, 
652.4139518334392, 212.54220141858596, 1022.1937291870629, 600.3131880709655, 
42.646331671579766, 303.5632707763308, 76.48622928394764, 996.5283205533284], 
"eval_len": [89, 231, 1000, 161, 1000, 491, 43, 341, 64, 819]}

 14%|█▍        | 139998/1000000 [2:27:30<11:14:10, 21.26it/s]global step 140000, trans_decision ep_re 680.8554779781701

{"global_step": 140000, "eval_re": [272.3537368206691, 163.60867328886079, 
1583.0486884863546, 1385.3392704574937, 908.53464942393, 184.5678656886006, 
1371.7991467036345, 35.233437847247046, 398.1553826899164, 505.91392837499575], 
"eval_len": [260, 123, 1000, 1000, 569, 155, 1000, 46, 241, 437]}

 15%|█▍        | 149999/1000000 [2:38:20<11:24:02, 20.71it/s]global step 150000, trans_decision ep_re 702.1258705637566

{"global_step": 150000, "eval_re": [152.27886984177889, 744.2859942086594, 
308.7919215822292, 1037.9326777102938, 291.4360786700788, 1347.5371610805564, 
667.0490500750918, 272.717062300333, 1411.3977037383836, 787.8321864301607], 
"eval_len": [152, 1000, 211, 819, 247, 997, 558, 169, 882, 1000]}

 16%|█▌        | 159999/1000000 [2:49:20<11:23:40, 20.48it/s]global step 160000, trans_decision ep_re 418.5547016107181

{"global_step": 160000, "eval_re": [128.89106889664495, 887.4006237934672, 
33.368111080887594, 128.34015609591017, 620.4058691902087, 370.3589051383961, 
638.1445898276093, 951.8480936613126, 322.3652815214064, 104.42431690133779], 
"eval_len": [90, 1000, 44, 86, 382, 249, 431, 1000, 313, 73]}

 17%|█▋        | 169999/1000000 [3:00:10<10:46:10, 21.41it/s]global step 170000, trans_decision ep_re 390.20989017054086

{"global_step": 170000, "eval_re": [31.07355625378302, 767.1001236347112, 
848.9718107342126, 303.13441331884206, 801.6589181493651, 211.02996162948068, 
489.1643726021983, 81.96630378240968, 195.27525389963378, 172.72418770077226], 
"eval_len": [44, 592, 504, 303, 1000, 170, 452, 96, 121, 110]}

 18%|█▊        | 179998/1000000 [3:10:50<10:51:50, 20.97it/s]global step 180000, trans_decision ep_re 513.6068112429984

{"global_step": 180000, "eval_re": [1207.4363804805716, 42.52740909369868, 
415.29588641823557, 850.3185014516245, 291.24145061148226, 1110.3895745806199, 
406.11017646674344, 54.18509823507027, 127.88891023598258, 630.6747248559561], 
"eval_len": [1000, 44, 322, 1000, 157, 1000, 224, 54, 89, 453]}

 19%|█▉        | 189998/1000000 [3:21:40<10:33:43, 21.30it/s]global step 190000, trans_decision ep_re 413.2016494874921

{"global_step": 190000, "eval_re": [684.1290498086022, 913.4719142450699, 
80.29157295922674, 45.79466054879146, 493.76877900862496, 595.0272223382528, 
76.88738044997271, 177.10209996884066, 728.071248729676, 337.4725668178629], 
"eval_len": [1000, 1000, 132, 48, 349, 1000, 76, 148, 1000, 256]}

 20%|█▉        | 199999/1000000 [3:32:40<10:34:38, 21.01it/s]global step 200000, trans_decision ep_re 454.2676311642589

{"global_step": 200000, "eval_re": [76.66540411996722, 525.6521422322289, 
89.55997812150983, 1091.1576267323794, 597.5933723355248, 173.1994376663088, 
161.584419583159, 333.88628158262736, 1196.2117779899247, 297.1658712789595], 
"eval_len": [80, 490, 80, 719, 1000, 133, 108, 195, 1000, 265]}

 21%|██        | 209999/1000000 [3:43:30<10:23:59, 21.10it/s]global step 210000, trans_decision ep_re 980.1019099159394

{"global_step": 210000, "eval_re": [1519.7910594267628, 1473.7543924224053, 
987.7396195838152, 1037.1620381203504, 782.0275928300704, 132.25855058485752, 
1198.4702047330122, 961.1620411542649, 377.71543119506975, 1330.938169108786], 
"eval_len": [1000, 1000, 1000, 667, 1000, 126, 1000, 624, 275, 885]}

 22%|██▏       | 219998/1000000 [3:54:30<10:07:48, 21.39it/s]global step 220000, trans_decision ep_re 331.78043822284656

{"global_step": 220000, "eval_re": [30.728949720215063, 315.8829865629096, 
79.5196402465523, 1132.8581339414077, 111.67296991803403, 542.0671916886912, 
116.7699133861188, 29.45356559766146, 720.3501842589747, 238.50084690790075], 
"eval_len": [54, 1000, 76, 1000, 76, 313, 100, 61, 488, 165]}

 23%|██▎       | 229998/1000000 [4:05:20<10:18:30, 20.75it/s]global step 230000, trans_decision ep_re 361.268959263975

{"global_step": 230000, "eval_re": [117.31581401456835, 124.51414367876748, 
278.0669116252267, 151.6089707867982, 376.77508901013255, 180.90120768855422, 
981.1037317702367, 646.5577647885989, 90.46364526292857, 665.3823140139385], 
"eval_len": [132, 88, 226, 99, 300, 114, 1000, 470, 176, 531]}

 24%|██▍       | 239998/1000000 [4:16:00<9:57:03, 21.22it/s]global step 240000, trans_decision ep_re 734.3694879060989

{"global_step": 240000, "eval_re": [940.1112590021016, 1367.8686231342429, 
543.7486978948383, 295.3051516535798, 281.9721879273126, 597.197603850516, 
912.0203544638335, 74.1952303483045, 1673.717572508513, 657.5581982777477], 
"eval_len": [519, 932, 358, 209, 200, 385, 511, 71, 1000, 487]}

 25%|██▍       | 249999/1000000 [4:27:00<9:51:33, 21.13it/s]global step 250000, trans_decision ep_re 380.6115995062604

{"global_step": 250000, "eval_re": [1382.034497702655, 98.9671112731317, 
112.07598934153596, 159.1819654014758, 177.6528744086444, 438.91257073609756, 
64.74016642865749, 949.69490530598, 350.24735234583125, 72.608562118595], 
"eval_len": [907, 88, 72, 137, 196, 373, 61, 1000, 247, 58]}

 26%|██▌       | 259999/1000000 [4:37:50<9:51:06, 20.87it/s]global step 260000, trans_decision ep_re 963.8565201177365

{"global_step": 260000, "eval_re": [541.6524362085192, 1267.130340231615, 
1316.211204714206, 219.67818298877592, 1577.2037201963537, 166.28489592237025, 
565.7986318006509, 1300.38926897878, 1373.2771475696363, 1310.9393725664575], 
"eval_len": [361, 1000, 1000, 153, 1000, 1000, 443, 880, 1000, 1000]}

 27%|██▋       | 269997/1000000 [4:48:50<9:32:04, 21.27it/s]global step 270000, trans_decision ep_re 300.42943686187465

{"global_step": 270000, "eval_re": [622.587024517787, 432.09665949758477, 
315.01309379541317, 132.83500756212246, 192.72528687002094, 433.8533252492377, 
237.44186364985708, 182.5779657912715, 172.86224459454888, 282.30189709090314], 
"eval_len": [1000, 269, 1000, 81, 203, 294, 154, 207, 229, 183]}

 28%|██▊       | 279999/1000000 [4:59:30<9:38:36, 20.74it/s]global step 280000, trans_decision ep_re -27.549678705973815

{"global_step": 280000, "eval_re": [45.86694436626663, -184.00904236257568, 
27.367261596202184, 617.798578582466, -157.39806077478096, -379.72755938190045, 
-138.93236621967654, -75.70988309197686, 61.394846466570975, -92.1475062403335],
"eval_len": [88, 1000, 46, 1000, 1000, 1000, 1000, 1000, 265, 1000]}

 29%|██▉       | 289998/1000000 [5:10:30<9:19:50, 21.14it/s]global step 290000, trans_decision ep_re 498.7035888984369

{"global_step": 290000, "eval_re": [604.3477463535297, 68.44302286263719, 
183.390511742069, 705.8834917072603, 538.2488504285857, 501.2183150268344, 
1253.039306325978, 400.1918569734373, 467.2983776727159, 264.9744098913213], 
"eval_len": [1000, 67, 1000, 1000, 1000, 1000, 986, 292, 410, 1000]}

 30%|██▉       | 299998/1000000 [5:21:30<9:01:36, 21.54it/s]global step 300000, trans_decision ep_re 495.03484548811855

{"global_step": 300000, "eval_re": [40.55214017650756, 853.6784162936812, 
753.3881928955996, 745.7435073830083, 109.09725223824461, 324.6931053554287, 
326.2737174998326, 1218.3028115422828, 484.7734631979662, 93.8458482986331], 
"eval_len": [64, 1000, 499, 537, 77, 170, 258, 697, 393, 69]}

 31%|███       | 309999/1000000 [5:32:10<9:08:20, 20.97it/s]global step 310000, trans_decision ep_re 426.1209478247118

{"global_step": 310000, "eval_re": [367.4914081452593, 97.9331518624544, 
411.21573020127715, 265.0336381628604, 260.2506944729736, 772.4677122376834, 
156.1645801456384, 643.9155709037391, 1019.8964952307117, 266.8404968845208], 
"eval_len": [289, 76, 326, 148, 172, 1000, 92, 434, 1000, 196]}

 32%|███▏      | 319997/1000000 [5:43:00<8:55:49, 21.15it/s]global step 320000, trans_decision ep_re 1090.3732364261589

{"global_step": 320000, "eval_re": [1182.1041589228503, 859.257403340029, 
1584.619838131292, 1099.2818411484577, 1250.351739963744, 1117.8375000538224, 
1411.8825530224824, 927.9180957042876, 1230.6599864914417, 239.81924748318036], 
"eval_len": [1000, 1000, 1000, 1000, 811, 734, 795, 517, 1000, 160]}

 33%|███▎      | 329999/1000000 [5:54:00<8:58:00, 20.76it/s]global step 330000, trans_decision ep_re 655.3658942344402

{"global_step": 330000, "eval_re": [1170.1666703750288, 641.8442334713554, 
250.0600685188672, 817.4874160698505, 604.072408237686, 165.72569831471927, 
70.58246923540256, 1004.8897649305711, 823.182887077952, 1005.64732611297], 
"eval_len": [1000, 447, 142, 480, 451, 114, 103, 660, 666, 1000]}

 34%|███▍      | 339999/1000000 [6:05:00<8:43:21, 21.02it/s]global step 340000, trans_decision ep_re 485.5740262719349

{"global_step": 340000, "eval_re": [867.467322329062, 423.5083573043202, 
147.59950731373206, 306.2618908873613, 98.56752820399609, 909.2909646766557, 
163.30341956010616, 247.96532690273338, 216.27910482801204, 1475.4968407133695],
"eval_len": [653, 300, 128, 189, 93, 690, 97, 172, 154, 1000]}

 35%|███▍      | 349997/1000000 [6:15:40<8:35:48, 21.00it/s]global step 350000, trans_decision ep_re 744.1812012449192

{"global_step": 350000, "eval_re": [507.29959856078136, 1300.937148582157, 
696.6951379658417, 339.05653037117247, 266.9334302435063, 592.5165196819082, 
1294.1699001755019, 877.720634912563, 556.0866078423385, 1010.3965041134206], 
"eval_len": [362, 1000, 1000, 207, 212, 570, 895, 1000, 409, 1000]}

 36%|███▌      | 359999/1000000 [6:26:30<8:33:22, 20.78it/s]global step 360000, trans_decision ep_re 461.442227120168

{"global_step": 360000, "eval_re": [685.5272881396883, 281.6893321579952, 
107.59341852883723, 139.52187187606523, 856.812501199549, 396.9826187934242, 
767.3819604858322, 147.93791873535534, 508.2289329858261, 722.7464282991072], 
"eval_len": [398, 162, 94, 78, 599, 383, 609, 92, 356, 421]}

 37%|███▋      | 369999/1000000 [6:37:10<8:27:12, 20.70it/s]global step 370000, trans_decision ep_re 810.1270696312076

{"global_step": 370000, "eval_re": [760.0110250731672, 366.2457407202546, 
148.51598778674958, 730.7095468587286, 1075.6422322093345, 1434.3640240822233, 
1213.5968230346598, 588.3778588024633, 144.62376716316217, 1639.183690581332], 
"eval_len": [1000, 309, 100, 434, 642, 1000, 764, 354, 110, 1000]}

 38%|███▊      | 379997/1000000 [6:48:00<8:01:01, 21.48it/s]global step 380000, trans_decision ep_re 539.97419435425

{"global_step": 380000, "eval_re": [1546.9653314873062, 214.15972609752055, 
1185.2620901646403, 792.3659230787615, 406.93238275757443, 561.3810354013852, 
100.83617734051444, 72.54823338480747, 133.88167196043173, 385.4093718695572], 
"eval_len": [840, 139, 721, 410, 243, 374, 153, 64, 96, 328]}

 39%|███▉      | 389998/1000000 [6:58:40<7:52:57, 21.50it/s]global step 390000, trans_decision ep_re 855.4401778885258

{"global_step": 390000, "eval_re": [1317.8176925043338, 353.8506778589788, 
39.115466891530296, 742.0125616713455, 907.9537401107146, 1173.779959173702, 
1250.4012408876288, 968.1017658125111, 383.0514114435972, 1418.3172625309164], 
"eval_len": [905, 173, 68, 1000, 1000, 1000, 819, 1000, 254, 1000]}

 40%|███▉      | 399999/1000000 [7:09:30<7:43:09, 21.59it/s]global step 400000, trans_decision ep_re 775.7673389421287

{"global_step": 400000, "eval_re": [348.55816532707235, 748.6780669365962, 
1348.0703537906759, 1350.0533087745266, 1323.5547965964377, 1068.7724456948417, 
670.0534847543878, 111.30796990141815, 21.891077847187827, 766.7337197981426], 
"eval_len": [244, 1000, 1000, 1000, 873, 1000, 601, 65, 24, 1000]}

 41%|████      | 409997/1000000 [7:20:20<7:37:48, 21.48it/s]global step 410000, trans_decision ep_re 647.2748144758945

{"global_step": 410000, "eval_re": [329.15704802260296, 124.87320350283204, 
1053.9690428775048, 597.8614998730698, 600.0496171243998, 805.853278104651, 
820.7988387129072, 832.3671610235301, 834.9662118187033, 472.8522436987429], 
"eval_len": [229, 85, 572, 418, 398, 1000, 1000, 491, 1000, 373]}

 42%|████▏     | 419999/1000000 [7:31:10<7:23:17, 21.81it/s]global step 420000, trans_decision ep_re 1031.7684855817224

{"global_step": 420000, "eval_re": [950.6927878053327, 1230.508502621653, 
1336.0869000537505, 664.051806944572, 191.4625001060237, 1300.435845274996, 
1137.6129908033038, 1354.7227309926484, 949.8512528129687, 1202.2595384019742], 
"eval_len": [705, 1000, 1000, 1000, 142, 768, 939, 1000, 1000, 1000]}

 43%|████▎     | 429997/1000000 [7:42:00<7:24:32, 21.37it/s]global step 430000, trans_decision ep_re 655.9021806431293

{"global_step": 430000, "eval_re": [194.5347743515266, 500.2716064310173, 
892.2842235385142, 772.6501653287725, 329.7947219490952, 1389.7264459850953, 
1091.2018032038375, 103.91843086645382, 407.69927979731096, 876.9403549796709], 
"eval_len": [148, 353, 540, 1000, 251, 1000, 706, 70, 315, 535]}

 44%|████▍     | 439999/1000000 [7:52:40<7:13:18, 21.54it/s]global step 440000, trans_decision ep_re 769.5208531381971

{"global_step": 440000, "eval_re": [147.3057998028913, 1335.1465371771505, 
79.9632523827172, 647.2224482589528, 110.54352328151963, 760.0152000448029, 
1151.0849349499465, 301.84439281308306, 1571.1770242509206, 1590.9054184199865],
"eval_len": [134, 1000, 73, 391, 94, 541, 641, 226, 1000, 968]}

 45%|████▍     | 449999/1000000 [8:03:30<7:02:34, 21.69it/s]global step 450000, trans_decision ep_re 1167.2414125788614

{"global_step": 450000, "eval_re": [1443.287411612969, 1567.41677051519, 
1492.1217178347274, 872.5649774648632, 1760.9713352410158, 731.0107498848444, 
423.1654407415765, 773.4181568019201, 792.6853665613635, 1815.772199130142], 
"eval_len": [1000, 1000, 1000, 527, 993, 524, 237, 461, 494, 1000]}

 46%|████▌     | 459999/1000000 [8:14:20<6:56:07, 21.63it/s]global step 460000, trans_decision ep_re 539.5637852542802

{"global_step": 460000, "eval_re": [544.0379017418688, 440.47650206011764, 
331.1415686316823, 307.96496836731563, 968.3023479006846, 1238.0685591473443, 
713.2948709901733, 481.7407559268228, 83.93715872502591, 286.6732190517663], 
"eval_len": [398, 382, 250, 298, 644, 1000, 509, 273, 68, 210]}

 47%|████▋     | 469997/1000000 [8:24:50<6:56:41, 21.20it/s]global step 470000, trans_decision ep_re 583.9824154479829

{"global_step": 470000, "eval_re": [175.4504783154198, 1505.2384579146544, 
291.81300609570434, 233.72990404723723, 398.6236663692981, 525.022908375974, 
889.6227621559091, 380.96330508018735, 1333.7449252827807, 105.61474084266382], 
"eval_len": [132, 1000, 177, 155, 320, 405, 610, 229, 1000, 104]}

 48%|████▊     | 479997/1000000 [8:35:30<6:42:38, 21.52it/s]global step 480000, trans_decision ep_re 529.5519453740733

{"global_step": 480000, "eval_re": [845.8136323019773, 579.1019860534403, 
265.0082208668175, 765.2387057400409, 277.32947567980807, 986.7711596443402, 
161.17112478182253, 677.7143963819228, 401.5182936675161, 335.85245862304583], 
"eval_len": [612, 363, 250, 1000, 166, 1000, 106, 468, 243, 202]}

 49%|████▉     | 489998/1000000 [8:46:10<6:34:04, 21.57it/s]global step 490000, trans_decision ep_re 352.7918228717693

{"global_step": 490000, "eval_re": [116.62833187166231, 113.762771976564, 
859.0059359233151, 113.86374967861195, 1028.4791994717566, 13.841438318262316, 
190.44826087226335, 436.53786778572606, 204.44205944728168, 450.90861337224914],
"eval_len": [101, 92, 1000, 76, 1000, 12, 242, 272, 131, 377]}

 50%|████▉     | 499999/1000000 [8:56:40<6:29:48, 21.38it/s]global step 500000, trans_decision ep_re 726.9783590428516

{"global_step": 500000, "eval_re": [584.2901065014754, 1260.6318142937128, 
778.2829056117257, 1129.110126023306, 1094.0447636444665, 88.04084150818233, 
233.77314050041855, 833.8427031386002, 699.7798973250395, 567.9872918815892], 
"eval_len": [1000, 1000, 441, 1000, 1000, 61, 144, 551, 428, 329]}

 51%|█████     | 509999/1000000 [9:07:20<6:21:06, 21.43it/s]global step 510000, trans_decision ep_re 1047.5433744385748

{"global_step": 510000, "eval_re": [161.90187251180134, 1580.5962016370506, 
220.1030421535095, 1455.5007185891277, 1546.9336378669304, 1644.3902153023785, 
1002.1872794504327, 1476.509660136756, 900.8481251433661, 486.4629915943958], 
"eval_len": [120, 1000, 185, 1000, 1000, 1000, 645, 1000, 612, 367]}

 52%|█████▏    | 519999/1000000 [9:18:10<6:08:22, 21.72it/s]global step 520000, trans_decision ep_re 843.0869797932295

{"global_step": 520000, "eval_re": [448.06018253331644, 535.9777235542479, 
860.6393401778265, 1243.8961750271078, 1132.7414104982688, 1154.5876679147134, 
1382.2426833915135, 448.055397613924, 436.9472732608948, 787.7219439604828], 
"eval_len": [261, 1000, 610, 896, 624, 1000, 800, 268, 318, 508]}

 53%|█████▎    | 529998/1000000 [9:29:00<6:09:06, 21.22it/s]global step 530000, trans_decision ep_re 661.5028752906235

{"global_step": 530000, "eval_re": [1168.8737578530952, 503.14484817712537, 
1706.6074017664055, 981.5026390383699, 708.826384791365, 178.6608861380691, 
91.06591332377967, 207.59986771030148, 834.9463435751426, 233.8007105325817], 
"eval_len": [686, 332, 1000, 1000, 1000, 146, 70, 111, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [9:39:40<5:58:16, 21.40it/s]global step 540000, trans_decision ep_re 458.7845572749211

{"global_step": 540000, "eval_re": [159.2685177885015, 668.4530133524415, 
521.412830759002, 216.88759622984023, 398.75748055042504, 282.1774653180346, 
1574.6601442328435, 115.03565916935048, 221.026265795697, 430.1665995530745], 
"eval_len": [150, 413, 1000, 133, 251, 218, 1000, 80, 195, 238]}

 55%|█████▍    | 549999/1000000 [9:50:20<5:48:13, 21.54it/s]global step 550000, trans_decision ep_re 651.3406700381785

{"global_step": 550000, "eval_re": [1393.644436729639, 894.8472283465416, 
39.63691575056863, 132.53800187836558, 142.99805775795238, 944.0530447530523, 
1179.4583279211968, 478.35200964771525, 548.2220965067104, 759.6565810900419], 
"eval_len": [1000, 1000, 65, 129, 109, 1000, 647, 320, 1000, 483]}

 56%|█████▌    | 559999/1000000 [10:01:10<5:42:34, 21.41it/s]global step 560000, trans_decision ep_re 804.7823362052172

{"global_step": 560000, "eval_re": [406.5284265946361, 1313.7044138523893, 
994.1088975778271, 433.4374226025003, 668.7184503463496, 1456.43086826889, 
533.5454572734768, 516.1804507971393, 311.30351255487415, 1413.86546218409], 
"eval_len": [312, 832, 646, 337, 468, 1000, 1000, 384, 218, 1000]}

 57%|█████▋    | 569999/1000000 [10:11:50<5:32:27, 21.56it/s]global step 570000, trans_decision ep_re 1041.7426783300982

{"global_step": 570000, "eval_re": [669.1016577888399, 1467.3463636709723, 
421.3206529822797, 1578.1380045742128, 1696.8120574612703, 890.7557056639514, 
672.6949368355824, 1613.7745606971596, 1174.7529828091074, 232.72986081760382], 
"eval_len": [535, 1000, 284, 1000, 1000, 664, 486, 1000, 737, 183]}

 58%|█████▊    | 579998/1000000 [10:22:40<5:25:52, 21.48it/s]global step 580000, trans_decision ep_re 574.236567283674

{"global_step": 580000, "eval_re": [461.43305719197366, 336.39667710349084, 
787.0328132279253, 286.70210315494586, 537.1574526061845, 275.4540157722522, 
694.8636785923211, 705.7396229534735, 790.7485529517522, 866.8376992824209], 
"eval_len": [284, 197, 1000, 207, 372, 184, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [10:33:20<5:14:56, 21.70it/s]global step 590000, trans_decision ep_re 628.3295573524553

{"global_step": 590000, "eval_re": [1312.5071514398182, 460.8178369061348, 
129.12839385288368, 489.9763328196656, 78.13818617126812, 820.4181663927842, 
460.0945555126141, 1524.9023610236088, 830.0521876251516, 177.26040178062343], 
"eval_len": [929, 305, 103, 338, 75, 551, 336, 1000, 543, 125]}

 60%|█████▉    | 599999/1000000 [10:44:00<5:09:36, 21.53it/s]global step 600000, trans_decision ep_re 513.0831714129297

{"global_step": 600000, "eval_re": [872.9298582835631, 1604.2199810515658, 
1067.718698162616, 70.4343571807481, 519.4391661404272, 139.791195731747, 
503.22305369307526, 133.08252700454847, 170.15336488359375, 49.83951199741077], 
"eval_len": [638, 1000, 653, 68, 320, 106, 364, 94, 111, 47]}

 61%|██████    | 609999/1000000 [10:54:40<4:59:44, 21.69it/s]global step 610000, trans_decision ep_re 940.5477785247083

{"global_step": 610000, "eval_re": [1196.9979208546122, 833.256647939752, 
867.8575751076223, 1506.6398100168633, 1440.8647683836405, 419.4567340051657, 
996.4981615341071, 1234.7478811141787, 46.8604563115484, 862.2978299795939], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 282, 690, 814, 50, 525]}

 62%|██████▏   | 619999/1000000 [11:05:20<4:51:23, 21.73it/s]global step 620000, trans_decision ep_re 447.8407533637375

{"global_step": 620000, "eval_re": [431.1150010206945, 329.4293507916844, 
363.9630094621219, 520.1919315617541, 847.268411506281, 61.443687610550384, 
450.4081705755534, 647.6184939575642, 415.32630922968474, 411.64316792148674], 
"eval_len": [265, 215, 260, 309, 511, 62, 1000, 1000, 245, 267]}

 63%|██████▎   | 629999/1000000 [11:16:00<4:44:51, 21.65it/s]global step 630000, trans_decision ep_re 617.4408087469185

{"global_step": 630000, "eval_re": [622.6731520069077, 66.02641611770485, 
757.0405627770465, 811.2268814968342, 412.4491266793119, 1509.1728475839595, 
168.90243513446245, 236.97348313242603, 1160.4926521340558, 429.45053040647605],
"eval_len": [341, 59, 1000, 640, 236, 1000, 146, 143, 1000, 275]}

 64%|██████▍   | 639997/1000000 [11:26:40<4:39:26, 21.47it/s]global step 640000, trans_decision ep_re 622.111089796868

{"global_step": 640000, "eval_re": [657.7914310148848, 1666.788072078739, 
77.46802107150923, 422.343076839252, 624.7324832596746, 351.71204525238653, 
90.89838579778666, 481.5173181812007, 689.7495498873619, 1158.1105145858842], 
"eval_len": [403, 1000, 114, 267, 363, 241, 112, 464, 1000, 664]}

 65%|██████▍   | 649998/1000000 [11:37:20<4:31:53, 21.45it/s]global step 650000, trans_decision ep_re 461.41449917220797

{"global_step": 650000, "eval_re": [351.5207944747482, 841.5719496307787, 
142.80152813979777, 846.2132353399271, 110.35103280098974, 951.5976141323357, 
816.2568692115678, 89.89097635573837, 319.74229166491494, 144.19869997128131], 
"eval_len": [288, 1000, 120, 1000, 91, 702, 1000, 73, 252, 106]}

 66%|██████▌   | 659998/1000000 [11:48:00<4:25:01, 21.38it/s]global step 660000, trans_decision ep_re 518.87571085684

{"global_step": 660000, "eval_re": [172.92281274406642, 1251.744180723352, 
1382.5776228169914, 473.6700992458622, 202.34185241839467, 40.81431729469673, 
864.2727527219103, 632.8540315485518, 39.52797604076718, 128.0314630138071], 
"eval_len": [109, 897, 965, 392, 110, 46, 641, 382, 45, 89]}

 67%|██████▋   | 669999/1000000 [11:58:30<4:16:38, 21.43it/s]global step 670000, trans_decision ep_re 700.569660635105

{"global_step": 670000, "eval_re": [181.370276215034, 159.3148997488914, 
731.2091924441114, 1240.7204561404567, 967.8752790041515, 1191.4761521642902, 
104.60430735530853, 626.4837120093312, 1226.7811252521728, 575.8612060173027], 
"eval_len": [142, 129, 465, 918, 1000, 1000, 72, 465, 778, 368]}

 68%|██████▊   | 679999/1000000 [12:09:20<4:09:42, 21.36it/s]global step 680000, trans_decision ep_re 1143.180477684891

{"global_step": 680000, "eval_re": [183.23411333205127, 1483.085571105224, 
1244.120741980761, 1504.4709977380614, 740.7875206478283, 1567.0663779238287, 
147.8282147263186, 1629.976156591146, 1470.8130707668686, 1460.4220120368238], 
"eval_len": [158, 1000, 856, 1000, 528, 1000, 99, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [12:20:10<4:01:36, 21.38it/s]global step 690000, trans_decision ep_re 793.9418189785678

{"global_step": 690000, "eval_re": [494.5490370091434, 364.38356346429924, 
888.9723123118911, 372.912626540458, 1409.8245685948684, 70.30350592443357, 
385.9848384507414, 1399.9372941393017, 1390.2942316994656, 1162.256211651075], 
"eval_len": [309, 270, 690, 318, 1000, 65, 291, 1000, 1000, 801]}

 70%|██████▉   | 699999/1000000 [12:30:50<3:51:26, 21.60it/s]global step 700000, trans_decision ep_re 837.9809249221416

{"global_step": 700000, "eval_re": [1222.6091741033792, 912.7870033829781, 
413.5798867674935, 894.3494788088458, 458.2980730796464, 1516.9475013705605, 
944.6277096586984, 77.82434867193923, 441.45347343848294, 1497.3325999393905], 
"eval_len": [1000, 587, 330, 1000, 307, 1000, 637, 115, 304, 1000]}

 71%|███████   | 709999/1000000 [12:41:40<3:45:06, 21.47it/s]global step 710000, trans_decision ep_re 127.99646642690838

{"global_step": 710000, "eval_re": [178.6664765582514, -43.491774688224105, 
127.46363739774834, 390.32074446808485, -212.89460527387254, 111.83649118178357,
-12.404261301519181, 463.0216568688872, 237.10624402085335, 40.34005503709088], 
"eval_len": [1000, 1000, 100, 1000, 1000, 70, 1000, 1000, 155, 55]}

 72%|███████▏  | 719999/1000000 [12:52:20<3:35:35, 21.65it/s]global step 720000, trans_decision ep_re 607.4896237290952

{"global_step": 720000, "eval_re": [1292.9136653168246, 760.0041752832284, 
248.7508324597668, 1025.6204150398892, 230.64558000289483, 587.0366542706114, 
86.09452171277219, 102.96951415987533, 832.3057010523713, 908.5551779927173], 
"eval_len": [1000, 1000, 195, 1000, 192, 1000, 87, 112, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [13:03:10<3:31:43, 21.25it/s]global step 730000, trans_decision ep_re 585.1792059013875

{"global_step": 730000, "eval_re": [1100.561334215176, 660.7219074732766, 
1304.932592667112, 405.749389745307, 792.1516405165323, 186.68414376790486, 
134.92489191146814, 893.437125929026, 260.46806633067837, 112.1609664573936], 
"eval_len": [1000, 445, 924, 362, 1000, 121, 106, 569, 223, 114]}

 74%|███████▍  | 739999/1000000 [13:13:40<3:22:27, 21.40it/s]global step 740000, trans_decision ep_re 1061.456565975955

{"global_step": 740000, "eval_re": [1471.3834758971796, 1629.3239168617026, 
901.2816763250132, 148.10241614182212, 1072.0037298299903, 1482.1370411009852, 
1247.7707049040841, 1319.9186295606703, 1106.5187348183135, 236.1253343197909], 
"eval_len": [1000, 1000, 569, 103, 751, 1000, 1000, 1000, 691, 191]}

 75%|███████▍  | 749999/1000000 [13:24:30<3:13:46, 21.50it/s]global step 750000, trans_decision ep_re 830.394311925305

{"global_step": 750000, "eval_re": [1151.2230611336186, 784.9232988182001, 
1084.8837292346266, 1558.0472886823798, 294.16995741007895, 1329.6444507806427, 
219.14464393251737, 1368.0061657178035, 322.3890590919405, 191.51146445124087], 
"eval_len": [1000, 1000, 792, 1000, 260, 993, 186, 1000, 206, 133]}

 76%|███████▌  | 759999/1000000 [13:35:20<3:04:44, 21.65it/s]global step 760000, trans_decision ep_re 770.0903182116588

{"global_step": 760000, "eval_re": [1042.7833056709771, 1426.319713389684, 
528.0939482405479, 700.0504678350337, 272.51021985063824, 265.20860087652375, 
795.5702445026718, 1057.270624689825, 445.679564599094, 1167.4164924615925], 
"eval_len": [1000, 976, 1000, 622, 206, 193, 605, 812, 330, 777]}

 77%|███████▋  | 769999/1000000 [13:46:00<2:58:25, 21.49it/s]global step 770000, trans_decision ep_re 846.8532343113968

{"global_step": 770000, "eval_re": [1216.5905057095858, 1143.9108060874896, 
91.78716865249706, 370.74089999290567, 1111.46106928069, 1096.5131268224425, 
1220.5962715442017, 907.1187988456604, 1241.8113431104869, 68.0023530680076], 
"eval_len": [815, 1000, 80, 249, 692, 1000, 768, 584, 1000, 75]}

 78%|███████▊  | 779999/1000000 [13:56:50<2:51:02, 21.44it/s]global step 780000, trans_decision ep_re 183.38603197065987

{"global_step": 780000, "eval_re": [179.00094879213492, -5.121462232261184, 
50.379695730492315, 26.73460577658364, 62.062322828295294, 108.06594271634353, 
588.4023052074269, 301.3869287664634, 275.8260170191489, 247.12301510197074], 
"eval_len": [152, 138, 109, 148, 130, 128, 1000, 251, 192, 278]}

 79%|███████▉  | 789999/1000000 [14:07:20<2:41:44, 21.64it/s]global step 790000, trans_decision ep_re 672.1357538595806

{"global_step": 790000, "eval_re": [171.65933842140797, 659.6097338013877, 
385.44958779842017, 1287.0371427125879, 222.24948640860453, 278.78158174981127, 
195.23203550984206, 1343.117325470305, 1107.1540583993572, 1071.0672483240821], 
"eval_len": [164, 1000, 241, 841, 171, 205, 124, 1000, 966, 619]}

 80%|███████▉  | 799997/1000000 [14:18:00<2:36:01, 21.36it/s]global step 800000, trans_decision ep_re 838.9189527155643

{"global_step": 800000, "eval_re": [1350.233727721223, 925.0495440630783, 
1433.4134583522748, 526.9775789348381, 1225.5270008301227, 1296.896710474648, 
549.8887760780475, 167.66248787795845, 574.8353092372683, 338.70493358618364], 
"eval_len": [1000, 1000, 1000, 360, 875, 953, 378, 133, 409, 215]}

 81%|████████  | 809998/1000000 [14:28:50<2:26:06, 21.67it/s]global step 810000, trans_decision ep_re 498.53232751870854

{"global_step": 810000, "eval_re": [31.608020110349102, 751.1888782216577, 
1067.774698612483, 1180.5962419161199, 505.2482855170099, 97.34035420616148, 
276.7984762017396, 306.318631295965, 69.94728128281749, 698.502407822783], 
"eval_len": [36, 463, 782, 1000, 377, 85, 169, 235, 68, 435]}

 82%|████████▏ | 819999/1000000 [14:39:30<2:19:35, 21.49it/s]global step 820000, trans_decision ep_re 906.7535680811636

{"global_step": 820000, "eval_re": [162.14044421354475, 627.3091654680317, 
1561.157568547104, 1630.8361259375129, 121.94060922958977, 1367.0319871991874, 
1563.9626341564317, 806.2428995695088, 1068.227480657333, 158.6867658333914], 
"eval_len": [142, 463, 1000, 1000, 92, 1000, 1000, 619, 739, 135]}

 83%|████████▎ | 829999/1000000 [14:50:11<2:11:58, 21.47it/s]global step 830000, trans_decision ep_re 540.4195841920794

{"global_step": 830000, "eval_re": [312.55499253384585, 89.87419043842766, 
1469.5395253120805, 99.41229431867886, 335.03572140007145, 426.69048776897506, 
1116.9732322774157, 224.796520301681, 190.19805198625568, 1139.1208255833617], 
"eval_len": [236, 99, 1000, 103, 189, 301, 707, 1000, 124, 725]}

 84%|████████▍ | 839998/1000000 [15:00:51<2:00:51, 22.06it/s]global step 840000, trans_decision ep_re 659.1807190191784

{"global_step": 840000, "eval_re": [662.7998894806617, 1003.7658814852633, 
456.02092636857, 163.35487528913808, 431.85740166839764, 422.0937278178307, 
881.0429400175592, 1437.3604350455837, 421.13805737024495, 712.3730556485341], 
"eval_len": [534, 1000, 361, 113, 243, 285, 650, 912, 275, 1000]}

 85%|████████▍ | 849998/1000000 [15:11:31<1:54:39, 21.80it/s]global step 850000, trans_decision ep_re 439.8719260072362

{"global_step": 850000, "eval_re": [297.439496432041, 695.7384670252311, 
55.04280895620605, 760.3642211977498, 363.01099443680533, 753.3166947741306, 
86.09454159364805, 608.9050241376889, 140.20887969853624, 638.598131820325], 
"eval_len": [172, 433, 76, 1000, 236, 1000, 60, 447, 181, 344]}

 86%|████████▌ | 859999/1000000 [15:22:01<1:50:40, 21.08it/s]global step 860000, trans_decision ep_re 407.79560981874045

{"global_step": 860000, "eval_re": [1516.7202013833178, -449.0229231989051, 
1528.8896211122549, -1583.3176357042744, 1556.5260824385248, 218.37423559661792,
1599.9446769407878, -1336.5675921137224, 735.8696360908978, 290.5397956419058], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 161, 1000, 1000, 1000, 200]}

 87%|████████▋ | 869999/1000000 [15:33:01<1:41:36, 21.32it/s]global step 870000, trans_decision ep_re 805.7886306668516

{"global_step": 870000, "eval_re": [151.86370783789056, 668.3901833705795, 
731.8673178098077, 652.9003918150968, 1498.7583886038788, 282.3725041292945, 
1063.276581715349, 544.0512978941036, 734.4545637320477, 1729.9513697604677], 
"eval_len": [96, 1000, 1000, 395, 1000, 215, 1000, 322, 1000, 1000]}

 88%|████████▊ | 879998/1000000 [15:43:51<1:31:42, 21.81it/s]global step 880000, trans_decision ep_re 806.6608563376765

{"global_step": 880000, "eval_re": [100.09378853531729, 1523.8900944594097, 
564.4785668730602, 1357.2989688595856, 70.78360328961249, 787.3496509595583, 
558.4819328259964, 738.5025614391135, 1139.1910417326872, 1226.5383544024241], 
"eval_len": [97, 1000, 407, 1000, 86, 597, 361, 1000, 819, 733]}

 89%|████████▉ | 889997/1000000 [15:54:31<1:25:34, 21.42it/s]global step 890000, trans_decision ep_re 683.9264913281743

{"global_step": 890000, "eval_re": [281.1996903886898, 827.2712748638088, 
855.5012266473715, 1403.820556828507, 774.3984181056093, 284.6652302838942, 
457.1393230570074, 785.1912732558926, 978.8507068384446, 191.22721301251747], 
"eval_len": [120, 498, 1000, 836, 574, 166, 237, 559, 697, 136]}

 90%|████████▉ | 899997/1000000 [16:05:11<1:16:42, 21.73it/s]global step 900000, trans_decision ep_re 593.8533864633899

{"global_step": 900000, "eval_re": [32.58803520723421, 1375.7677662637573, 
56.89848864710936, 1361.9854655394568, 892.7463175449496, 560.7153352860878, 
533.0526648394672, 620.0177980926743, 413.5135827346108, 91.24841047855186], 
"eval_len": [75, 1000, 76, 1000, 698, 343, 344, 466, 319, 78]}

 91%|█████████ | 909998/1000000 [16:15:51<1:09:33, 21.57it/s]global step 910000, trans_decision ep_re 286.5963701972586

{"global_step": 910000, "eval_re": [748.016029315781, 150.3354007681735, 
296.4461272781963, 93.13537077409558, 43.34185014252935, 418.4065116670088, 
194.11652500197886, 202.60662539466426, 304.9832650561215, 414.5759965740365], 
"eval_len": [1000, 86, 190, 72, 49, 249, 146, 108, 184, 292]}

 92%|█████████▏| 919997/1000000 [16:26:21<1:02:31, 21.32it/s]global step 920000, trans_decision ep_re 418.0656213507282

{"global_step": 920000, "eval_re": [119.64493988988347, 151.47846588260688, 
93.31732621830987, 223.3325760326382, 1431.9974724195465, 1218.118393413435, 
104.98958713037953, 320.44710274628005, 113.57723882311159, 403.75311095109066],
"eval_len": [88, 107, 62, 218, 1000, 1000, 75, 259, 69, 289]}

 93%|█████████▎| 929999/1000000 [16:36:51<54:36, 21.37it/s]global step 930000, trans_decision ep_re 723.4425412315436

{"global_step": 930000, "eval_re": [1472.290200741492, 1342.8043871109867, 
463.5071861703428, 467.3010687430999, 269.32176412178114, 129.3464051210362, 
759.6562767779379, 730.2185803270867, 1465.0543072500914, 134.92523595158102], 
"eval_len": [896, 925, 352, 304, 196, 87, 556, 564, 982, 87]}

 94%|█████████▍| 939999/1000000 [16:47:41<46:09, 21.67it/s]global step 940000, trans_decision ep_re 768.3931391553325

{"global_step": 940000, "eval_re": [385.1024312250156, 207.43484132564208, 
1690.5041471820668, 1057.182604950129, 851.9415797295027, 87.14908248324882, 
786.7936653706043, 1090.184178699304, 1385.1524610450324, 142.4863995427792], 
"eval_len": [248, 238, 1000, 694, 1000, 68, 1000, 698, 1000, 88]}

 95%|█████████▍| 949999/1000000 [16:58:21<38:49, 21.46it/s]global step 950000, trans_decision ep_re 836.4205803772859

{"global_step": 950000, "eval_re": [1527.5847944593825, 1483.798932929662, 
1524.6136481581989, 111.41930493529517, 743.7672862911727, 468.2737539714097, 
330.6746647396896, 689.468093523257, 247.1020101524543, 1237.5033146123376], 
"eval_len": [1000, 1000, 1000, 93, 1000, 389, 230, 436, 158, 1000]}

 96%|█████████▌| 959998/1000000 [17:09:11<31:11, 21.38it/s]global step 960000, trans_decision ep_re 481.4069684278653

{"global_step": 960000, "eval_re": [515.877668381711, 77.42630662023419, 
132.17863007083554, 894.6126124322661, 1000.4170366172273, 796.1164062164083, 
279.35343694088135, 133.04803450630422, 735.3253247018104, 249.71422779097475], 
"eval_len": [449, 65, 77, 608, 719, 1000, 198, 96, 454, 161]}

 97%|█████████▋| 969999/1000000 [17:19:41<23:11, 21.56it/s]global step 970000, trans_decision ep_re 312.3853903323979

{"global_step": 970000, "eval_re": [899.8854271004559, 424.50606883665307, 
182.59917743714874, 46.5492423493515, 753.163423873503, 50.30813407329945, 
328.9271874176538, 202.41901587215608, 136.22247762674746, 99.27374873701007], 
"eval_len": [1000, 332, 145, 48, 499, 48, 194, 155, 129, 78]}

 98%|█████████▊| 979999/1000000 [17:30:11<15:34, 21.40it/s]global step 980000, trans_decision ep_re 414.47813597674184

{"global_step": 980000, "eval_re": [45.95250686947946, 643.3438928197304, 
220.97661160924955, 129.32045797952392, 1262.696624651484, 241.0706008586165, 
280.111694209302, 303.471713427824, 240.96326794430317, 776.873989397906], 
"eval_len": [47, 390, 187, 106, 879, 187, 176, 179, 164, 493]}

 99%|█████████▉| 989998/1000000 [17:40:51<07:41, 21.69it/s]global step 990000, trans_decision ep_re 713.8552122841882

{"global_step": 990000, "eval_re": [513.3230709801707, 167.28023235048383, 
1362.2711001262394, 1008.4845374351394, 260.05671500100766, 165.66266586360896, 
839.3397284905272, 844.9558985428564, 1112.5583188122193, 864.61985523963], 
"eval_len": [375, 153, 1000, 1000, 405, 128, 626, 1000, 881, 650]}

100%|█████████▉| 999999/1000000 [17:51:31<00:00, 21.42it/s]global step 1000000, trans_decision ep_re 936.52138437075

{"global_step": 1000000, "eval_re": [1318.3892775888496, 381.42506772819223, 
792.7264787070235, 902.5099962714206, 854.0289784367359, 1440.3314937971318, 
562.0682874210602, 1571.9371384722426, 86.05453692183762, 1455.7425883630035], 
"eval_len": [904, 278, 454, 1000, 605, 877, 398, 1000, 77, 1000]}

100%|██████████| 1000000/1000000 [17:51:59<00:00, 15.55it/s]
