
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [02:40<5:34:45, 49.29it/s]global step 10000, trans_decision ep_re 704.8996061925066

{"global_step": 10000, "eval_re": [765.0838557712478, 750.3642935997491, 
814.3550494952389, 710.9676182004599, 703.4799383240356, 845.1044172941606, 
806.6123771979637, -0.2355482478176214, 816.8666169650655, 836.3974433249625], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 20, 1000, 1000]}

  2%|▏         | 19997/1000000 [07:30<5:34:33, 48.82it/s]global step 20000, trans_decision ep_re 607.9525867556611

{"global_step": 20000, "eval_re": [862.3018734892408, 760.661392877335, 
765.0232934259078, 327.3597937918453, 973.3415980301459, 231.90530823788586, 
136.40105231162494, 555.8226236069775, 816.6066496366628, 650.102282148984], 
"eval_len": [1000, 1000, 1000, 453, 1000, 264, 138, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [12:20<5:31:01, 48.84it/s]global step 30000, trans_decision ep_re 937.9196181223035

{"global_step": 30000, "eval_re": [897.3500797155582, 910.0685976503528, 
946.1789320052402, 1003.9342611887555, 876.9041859963205, 921.4945208898139, 
953.7509303388422, 706.6883376054111, 1109.761692955454, 1053.0646428772873], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 756, 1000, 1000]}

  4%|▍         | 39997/1000000 [17:10<5:27:13, 48.90it/s]global step 40000, trans_decision ep_re 819.5826116211832

{"global_step": 40000, "eval_re": [1295.8087517463757, 131.36241525209297, 
651.3470882432844, 1312.5976882387142, 397.5934287491006, 1397.7221540203532, 
289.9180298936895, 710.7814927384102, 731.0533619288102, 1277.6417054010033], 
"eval_len": [1000, 89, 487, 1000, 371, 1000, 213, 567, 1000, 1000]}

  5%|▍         | 49997/1000000 [22:00<5:20:46, 49.36it/s]global step 50000, trans_decision ep_re 1010.1808574115179

{"global_step": 50000, "eval_re": [794.9845790923924, 1346.0197246182827, 
1026.6180351758505, 1082.5187074265787, 382.85035460558066, 1554.929013996853, 
701.149731321223, 696.1054978416047, 1101.8733705399354, 1414.7595594968789], 
"eval_len": [501, 848, 590, 648, 1000, 1000, 1000, 446, 721, 1000]}

  6%|▌         | 59999/1000000 [26:50<5:18:33, 49.18it/s]global step 60000, trans_decision ep_re 983.277295483075

{"global_step": 60000, "eval_re": [729.7336142707907, 1235.2315475256764, 
252.41891617316924, 1005.685264238575, 1189.3375569248717, 1443.8133999234255, 
1028.7309957635966, 1004.8724631015461, 1080.1649924096432, 862.7842044994561], 
"eval_len": [1000, 778, 125, 589, 815, 1000, 625, 552, 1000, 548]}

  7%|▋         | 69999/1000000 [31:40<5:15:14, 49.17it/s]global step 70000, trans_decision ep_re 782.2913808604757

{"global_step": 70000, "eval_re": [975.9540068009762, 727.6196826714388, 
142.18490143585998, 361.54634841428003, 1590.1716700045638, 574.8714714630905, 
880.4948277534513, 732.8176997109701, 786.5466735853995, 1050.7065267647258], 
"eval_len": [1000, 524, 78, 177, 992, 290, 1000, 1000, 1000, 559]}

  8%|▊         | 79999/1000000 [36:20<5:10:04, 49.45it/s]global step 80000, trans_decision ep_re 849.167219781613

{"global_step": 80000, "eval_re": [441.14554010497903, 604.8436836860786, 
467.71631964698406, 1693.1166913052466, 425.2939712435029, 293.07817830119694, 
1740.756347293438, 962.7300115964074, 736.9460890279329, 1126.0453656103643], 
"eval_len": [201, 255, 310, 1000, 219, 221, 1000, 503, 420, 569]}

  9%|▉         | 89995/1000000 [41:10<5:07:30, 49.32it/s]global step 90000, trans_decision ep_re 1359.5697800886803

{"global_step": 90000, "eval_re": [818.4048066813392, 1792.2168484478764, 
1846.2218028927875, 1844.8536013783385, 1674.347636315397, 658.157227102076, 
1641.2697292505547, 476.8935206310978, 921.9624612781598, 1921.3701669091774], 
"eval_len": [358, 1000, 1000, 1000, 1000, 522, 1000, 273, 1000, 1000]}

 10%|▉         | 99995/1000000 [46:00<5:02:38, 49.56it/s]global step 100000, trans_decision ep_re 1857.6975917839518

{"global_step": 100000, "eval_re": [2076.3599828974743, 962.5595210761811, 
989.3190147600471, 1879.185414729952, 1938.785916400262, 2120.1689226054177, 
2678.4392600250726, 1886.3321546182724, 2207.381175707252, 1838.444555019588], 
"eval_len": [1000, 399, 1000, 847, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109995/1000000 [50:50<4:59:09, 49.58it/s]global step 110000, trans_decision ep_re 1667.3451055522528

{"global_step": 110000, "eval_re": [2216.542245687635, 2083.4207249517253, 
2091.0976137278885, 2365.1222230549743, 695.6558344892098, 161.3740957028089, 
2235.4932872287895, 1940.7736621085046, 2137.464679749965, 746.5066888210266], 
"eval_len": [1000, 1000, 1000, 1000, 303, 75, 1000, 1000, 1000, 340]}

 12%|█▏        | 119995/1000000 [55:40<4:57:30, 49.30it/s]global step 120000, trans_decision ep_re 1718.4110515367472

{"global_step": 120000, "eval_re": [2531.6962778491884, 1362.8987140477777, 
2273.1607422708335, 2458.6959532083674, 2699.1178753565455, 271.416602385085, 
881.9208053716874, 58.32830761993211, 2205.551404846961, 2441.3238324110926], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 379, 36, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:00:30<4:57:12, 48.79it/s]global step 130000, trans_decision ep_re 1591.1158357726104

{"global_step": 130000, "eval_re": [2523.000850318964, 2574.122103719475, 
1550.2907736080595, 1211.898492844156, 2171.4219369168413, 1318.4842770673554, 
1867.271983449419, 2533.243208562489, 144.2126769881085, 17.21205425123447], 
"eval_len": [1000, 1000, 1000, 526, 1000, 562, 664, 1000, 70, 15]}

 14%|█▍        | 139997/1000000 [1:05:20<4:50:53, 49.27it/s]global step 140000, trans_decision ep_re 2219.5133209585165

{"global_step": 140000, "eval_re": [2963.176777911338, 2586.4981754280284, 
994.0032791836276, 2971.0194544550486, 2950.1779089913657, 2387.699458570017, 
315.3738945948065, 1424.4213756081924, 2534.240293457879, 3068.5225913848617], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 923, 121, 630, 1000, 1000]}

 15%|█▍        | 149999/1000000 [1:10:10<4:47:12, 49.33it/s]global step 150000, trans_decision ep_re 2402.195749347262

{"global_step": 150000, "eval_re": [1421.4006605076424, 2677.8311830491552, 
1823.53096543068, 1297.1653287776946, 2676.7780793959223, 2527.2548741876403, 
2969.4984440177163, 2712.3914135439163, 3133.228820426326, 2782.8777241359267], 
"eval_len": [697, 1000, 853, 561, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:15:10<4:43:11, 49.44it/s]global step 160000, trans_decision ep_re 2190.243715134431

{"global_step": 160000, "eval_re": [2742.927542496459, 2930.3913916832366, 
2318.1350577549333, 122.83708117008922, 2732.4973894762984, 248.7048182894556, 
2567.9763198596743, 2920.6594616843227, 2734.5505406506636, 2583.7575482791785],
"eval_len": [1000, 1000, 1000, 59, 1000, 101, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169995/1000000 [1:20:00<4:40:22, 49.34it/s]global step 170000, trans_decision ep_re 1504.169122366028

{"global_step": 170000, "eval_re": [1498.8284328630941, 2677.5104086015253, 
1687.5881627338763, 379.312982334116, 2125.3632695064784, 833.7033005898953, 
2780.9805721747825, 1769.3228061631341, 910.8558791045537, 378.2254095888239], 
"eval_len": [441, 1000, 668, 138, 717, 268, 1000, 600, 1000, 724]}

 18%|█▊        | 179997/1000000 [1:24:33<4:36:17, 49.47it/s]global step 180000, trans_decision ep_re 609.1162387617221

{"global_step": 180000, "eval_re": [116.48094680935473, 419.8766301568138, 
900.2736689531458, 616.8445527545152, 645.2044458044096, 424.259591440055, 
1065.2636964143708, 74.6341662440981, 1033.8894520848335, 794.4352369556249], 
"eval_len": [48, 118, 315, 194, 193, 119, 388, 40, 323, 222]}

 19%|█▉        | 189999/1000000 [1:29:31<4:33:20, 49.39it/s]global step 190000, trans_decision ep_re 2185.344851422148

{"global_step": 190000, "eval_re": [999.3048978503064, 773.2360509388238, 
3457.329120680565, 3407.216235471366, 1617.2590914550779, 507.4473533132956, 
3619.2495316701225, 1020.9662975795566, 3044.759476848754, 3406.6804584136116], 
"eval_len": [266, 210, 1000, 1000, 495, 202, 1000, 309, 1000, 1000]}

 20%|█▉        | 199995/1000000 [1:34:11<4:30:33, 49.28it/s]global step 200000, trans_decision ep_re 2409.9357581576796

{"global_step": 200000, "eval_re": [3489.359696676454, 3628.5944493393717, 
3312.4212578012625, 710.230231315193, 3451.173037420745, 3535.3285884831316, 
143.77869820372385, 560.2832945868907, 1807.0043102791694, 3461.184017470856], 
"eval_len": [1000, 1000, 1000, 353, 1000, 1000, 59, 174, 595, 1000]}

 21%|██        | 209999/1000000 [1:39:01<4:30:13, 48.73it/s]global step 210000, trans_decision ep_re 2469.3509726152943

{"global_step": 210000, "eval_re": [3129.2812565311256, 3089.6257728697246, 
3615.219484553599, 1143.6126197746198, 712.4492123463024, 3623.6682410663516, 
3306.265994942077, 287.80745132731914, 3250.62187449726, 2534.9578182445575], 
"eval_len": [1000, 777, 1000, 284, 199, 1000, 1000, 1000, 1000, 674]}

 22%|██▏       | 219995/1000000 [1:43:51<4:24:33, 49.14it/s]global step 220000, trans_decision ep_re 2014.5666545111108

{"global_step": 220000, "eval_re": [2241.698771180806, 708.1512676188606, 
2939.691868989438, 1284.9601971553966, 833.0950990133283, 3042.37364587015, 
1268.7622001579975, 3020.735010498811, 2840.6675864718613, 1965.5308981544579], 
"eval_len": [740, 195, 1000, 1000, 356, 1000, 433, 1000, 1000, 645]}

 23%|██▎       | 229997/1000000 [1:48:42<4:19:55, 49.37it/s]global step 230000, trans_decision ep_re 2561.871138326999

{"global_step": 230000, "eval_re": [3326.7302891293575, 3660.7679615134407, 
2156.900736612456, 1092.5177630393746, 3843.4769497707894, 3208.4810782577247, 
3706.63501899069, 1271.592437700079, 799.914083954574, 2551.695064301504], 
"eval_len": [1000, 1000, 716, 1000, 1000, 1000, 1000, 348, 243, 759]}

 24%|██▍       | 239995/1000000 [1:53:32<4:15:33, 49.56it/s]global step 240000, trans_decision ep_re 1865.454160776882

{"global_step": 240000, "eval_re": [374.92850016659065, 906.3281652567752, 
3375.878584663978, 3190.8214749955864, 64.56647135119805, 1428.4633664650016, 
2877.670944160587, 723.3445171839152, 2727.0015309350483, 2985.538052590138], 
"eval_len": [132, 290, 1000, 1000, 34, 495, 882, 210, 771, 1000]}

 25%|██▍       | 249995/1000000 [1:58:22<4:13:14, 49.36it/s]global step 250000, trans_decision ep_re 2265.4432713796255

{"global_step": 250000, "eval_re": [1342.0386103386957, 1142.252980190483, 
3880.240840554403, 216.64674067506127, 1596.1361329775143, 4053.3934427574077, 
3483.9707376519113, 3808.288412643956, 1223.4065628861465, 1908.058253120675], 
"eval_len": [341, 1000, 1000, 64, 566, 1000, 1000, 1000, 393, 574]}

 26%|██▌       | 259999/1000000 [2:03:12<4:10:03, 49.32it/s]global step 260000, trans_decision ep_re 2068.463020191396

{"global_step": 260000, "eval_re": [1055.0753652726867, 1567.899587632449, 
3546.992065161022, 1281.8897714140014, 1673.1519984939873, 2859.2925753368063, 
2403.2346009185035, 206.54809409732746, 3166.446657657959, 2924.0994859292236], 
"eval_len": [316, 389, 1000, 1000, 437, 734, 1000, 69, 1000, 1000]}

 27%|██▋       | 269997/1000000 [2:08:02<4:06:14, 49.41it/s]global step 270000, trans_decision ep_re 1673.401140051764

{"global_step": 270000, "eval_re": [1054.5280495081297, 1278.6356070011775, 
584.7233567283057, 293.313718251153, 2548.2352523788177, 1017.1098672559458, 
3299.2777691351453, 1248.0033062400096, 3543.0894378215157, 1867.0950361974396],
"eval_len": [300, 349, 190, 89, 820, 486, 1000, 1000, 1000, 499]}

 28%|██▊       | 279999/1000000 [2:12:42<4:05:51, 48.81it/s]global step 280000, trans_decision ep_re 3010.8640792658866

{"global_step": 280000, "eval_re": [3171.7868289270386, 2460.135900834723, 
1564.7135206777998, 3410.886797051079, 3145.7312569183314, 3534.1784109286195, 
3702.765612234672, 3278.5737171467267, 2554.3972297138416, 3285.471518226032], 
"eval_len": [1000, 1000, 441, 1000, 1000, 1000, 1000, 876, 807, 1000]}

 29%|██▉       | 289995/1000000 [2:17:32<4:02:11, 48.86it/s]global step 290000, trans_decision ep_re 2505.8334879259996

{"global_step": 290000, "eval_re": [3797.2139378574902, 3786.07634627537, 
117.97885785631604, 2828.7929858144753, 479.42510642988873, 3506.4433656384313, 
470.74770065837254, 2944.7815939433194, 3457.5193678950573, 3669.3556168912755],
"eval_len": [1000, 1000, 66, 898, 138, 1000, 110, 715, 1000, 1000]}

 30%|██▉       | 299995/1000000 [2:22:22<3:55:33, 49.53it/s]global step 300000, trans_decision ep_re 2530.697530578133

{"global_step": 300000, "eval_re": [3660.3698050153084, 3519.5250863677215, 
756.5247074806886, 3487.679514221684, 3607.768551127029, 1395.8347040761716, 
955.4086725067446, 1693.076112999165, 3039.042304309918, 3191.7458476768984], 
"eval_len": [1000, 1000, 246, 1000, 1000, 456, 323, 464, 1000, 1000]}

 31%|███       | 309995/1000000 [2:27:12<3:53:49, 49.18it/s]global step 310000, trans_decision ep_re 2547.479359997594

{"global_step": 310000, "eval_re": [2330.1265705068595, 848.4493584844203, 
1894.5615738995969, 3227.6256765176718, 2477.3280106689713, 3729.501323977512, 
3554.073043120935, 580.355294093162, 3355.1257321488806, 3477.6470165579312], 
"eval_len": [587, 209, 577, 1000, 1000, 1000, 1000, 197, 1000, 1000]}

 32%|███▏      | 319995/1000000 [2:32:02<3:49:11, 49.45it/s]global step 320000, trans_decision ep_re 1938.5957642203957

{"global_step": 320000, "eval_re": [3659.600291709913, 580.5543011594, 
3700.9613966605734, 3365.69718124518, 648.6888918343915, 3623.7268168512346, 
1602.535353700644, 573.8207558855069, 1411.1072182280989, 219.26543492901683], 
"eval_len": [1000, 181, 1000, 1000, 202, 1000, 434, 156, 1000, 74]}

 33%|███▎      | 329997/1000000 [2:36:42<3:46:02, 49.40it/s]global step 330000, trans_decision ep_re 1959.909117765184

{"global_step": 330000, "eval_re": [4105.908531000383, 1079.0817828140673, 
1009.2926067620944, 1419.694149246272, 1529.9485272494003, 3326.788646591364, 
1027.9577922656715, 594.2984520026497, 1781.7250066533495, 3724.39568306659], 
"eval_len": [1000, 329, 334, 438, 735, 845, 274, 166, 458, 1000]}

 34%|███▍      | 339995/1000000 [2:41:32<3:43:57, 49.12it/s]global step 340000, trans_decision ep_re 2865.7451788516114

{"global_step": 340000, "eval_re": [3634.9145826124864, 3905.4009670180108, 
3716.501747704595, 3689.073184169414, 3892.7975965650576, 3683.366236388041, 
814.7793974024487, 3807.5637477499863, 1143.2578139426719, 369.7965149634024], 
"eval_len": [960, 1000, 1000, 1000, 1000, 1000, 199, 1000, 348, 122]}

 35%|███▍      | 349999/1000000 [2:46:12<3:39:30, 49.35it/s]global step 350000, trans_decision ep_re 2494.7835537321616

{"global_step": 350000, "eval_re": [1151.4135278192928, 1999.52432489172, 
1378.39221728488, 2704.5438860442464, 296.8753813429491, 3816.220714280385, 
3502.3410919940266, 2897.527515973777, 3570.2236489895495, 3630.773228700787], 
"eval_len": [358, 460, 397, 1000, 90, 1000, 1000, 791, 1000, 1000]}

 36%|███▌      | 359995/1000000 [2:51:02<3:36:05, 49.36it/s]global step 360000, trans_decision ep_re 1810.4482825672283

{"global_step": 360000, "eval_re": [4449.512985284092, 499.2255033651698, 
236.04683680938993, 942.3389848619246, 1343.4979767869195, 872.4910451476244, 
3784.42366063194, 454.07941063462624, 4178.98391902768, 1343.882503122919], 
"eval_len": [1000, 160, 69, 1000, 359, 226, 1000, 124, 1000, 317]}

 37%|███▋      | 369999/1000000 [2:55:52<3:33:44, 49.13it/s]global step 370000, trans_decision ep_re 1559.6788960162146

{"global_step": 370000, "eval_re": [769.0977876196753, 1290.422753532569, 
2983.4575653038987, 194.46059434123438, 107.3535650572012, 2314.5351846445196, 
1910.5574796225362, 793.6301513793416, 2153.4949377754465, 3079.7789408857248], 
"eval_len": [211, 1000, 1000, 59, 79, 617, 647, 1000, 618, 1000]}

 38%|███▊      | 379999/1000000 [3:00:32<3:29:15, 49.38it/s]global step 380000, trans_decision ep_re 2402.8422967370843

{"global_step": 380000, "eval_re": [3419.073503865604, 2185.3179562172418, 
4121.271400653905, 2976.262800940943, 2007.5473078104042, 525.0219708800855, 
599.5490463062976, 1750.309790975643, 2346.4153713086016, 4097.653818412116], 
"eval_len": [1000, 483, 1000, 762, 531, 134, 165, 423, 772, 1000]}

 39%|███▉      | 389999/1000000 [3:05:22<3:27:27, 49.01it/s]global step 390000, trans_decision ep_re 2542.1027314469093

{"global_step": 390000, "eval_re": [1361.1332251057859, 1059.4854734871635, 
1140.8683399841684, 4095.584494789162, 3200.7331472953124, 2942.2585676992153, 
4347.447087820797, 686.2229850982043, 3473.637034747983, 3113.656958441302], 
"eval_len": [366, 1000, 268, 1000, 1000, 674, 1000, 159, 1000, 777]}

 40%|███▉      | 399997/1000000 [3:10:02<3:24:28, 48.91it/s]global step 400000, trans_decision ep_re 3183.054849931865

{"global_step": 400000, "eval_re": [2342.726631670351, 4072.213701156729, 
3111.570543659468, 4032.9549378764673, 3342.4448048996946, 2753.807723665902, 
3585.389055761086, 477.07366179173323, 4224.8284402576055, 3887.5389985796132], 
"eval_len": [588, 1000, 799, 1000, 853, 840, 1000, 151, 1000, 1000]}

 41%|████      | 409995/1000000 [3:14:52<3:19:03, 49.40it/s]global step 410000, trans_decision ep_re 3694.213965071159

{"global_step": 410000, "eval_re": [3832.5749860045294, 4082.3374590410544, 
3078.833584949425, 2714.4718492972756, 3982.6489436003635, 4106.111850790115, 
3933.5863724485985, 3407.9876364989864, 3838.1686502025213, 3965.4183178787216],
"eval_len": [1000, 1000, 1000, 633, 982, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [3:19:52<3:16:13, 49.26it/s]global step 420000, trans_decision ep_re 1701.308520290508

{"global_step": 420000, "eval_re": [1010.3707146224091, 554.7658885724331, 
4253.828417572146, 1321.910341915124, 647.3550211631748, 3911.6287268829115, 
524.1067054206273, 842.0874214147217, 2472.0626440077253, 1474.9693213338064], 
"eval_len": [248, 1000, 1000, 335, 155, 1000, 132, 1000, 720, 339]}

 43%|████▎     | 429999/1000000 [3:24:32<3:14:58, 48.72it/s]global step 430000, trans_decision ep_re 3062.1996323944254

{"global_step": 430000, "eval_re": [4094.3261054523773, 3656.5293087785008, 
4563.18432242272, 2825.875222297718, 3810.619196047255, 2354.392687982297, 
4188.879859491783, 25.555033769109524, 888.2768020697505, 4214.357785632744], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 568, 1000, 19, 240, 1000]}

 44%|████▍     | 439995/1000000 [3:29:22<3:09:58, 49.13it/s]global step 440000, trans_decision ep_re 2770.6663071052562

{"global_step": 440000, "eval_re": [3007.056261572167, 4014.900799609701, 
2866.4247837785592, 4072.531950403341, 1306.9421048504462, 2973.072091283697, 
3354.5420670704093, 3568.8717303464414, 734.700785851339, 1807.6204962864574], 
"eval_len": [758, 1000, 878, 1000, 359, 853, 1000, 1000, 179, 444]}

 45%|████▍     | 449999/1000000 [3:34:13<3:06:19, 49.20it/s]global step 450000, trans_decision ep_re 3197.166043862079

{"global_step": 450000, "eval_re": [793.8095155841324, 3386.38083052756, 
4423.454942235475, 2095.478507348145, 4244.124946516411, 3937.660691645475, 
3730.808474683053, 1125.2902381967947, 4152.083021547563, 4082.5692703361783], 
"eval_len": [184, 764, 1000, 503, 1000, 1000, 1000, 322, 1000, 1000]}

 46%|████▌     | 459995/1000000 [3:39:03<3:01:53, 49.48it/s]global step 460000, trans_decision ep_re 2715.4203054675613

{"global_step": 460000, "eval_re": [235.18246709052238, 3725.9155581157925, 
2158.026272420796, 1321.444760882145, 4000.834237342046, 720.5092194485103, 
3486.395712173157, 3743.2597859036714, 3920.0440363295916, 3842.591004969382], 
"eval_len": [73, 1000, 554, 321, 1000, 195, 801, 1000, 892, 1000]}

 47%|████▋     | 469999/1000000 [3:43:53<2:59:00, 49.35it/s]global step 470000, trans_decision ep_re 3287.8882077227295

{"global_step": 470000, "eval_re": [3882.6283620819395, 3338.520367174074, 
4325.088747403277, 4255.280090236403, 2199.5099946271853, 4050.0227361798743, 
3571.1848865531056, 332.00437433487156, 2599.571304165143, 4325.071214471424], 
"eval_len": [1000, 1000, 1000, 1000, 497, 1000, 1000, 184, 619, 1000]}

 48%|████▊     | 479997/1000000 [3:48:43<2:57:41, 48.77it/s]global step 480000, trans_decision ep_re 1920.0863779935949

{"global_step": 480000, "eval_re": [1834.7549896108594, 2027.6745387553615, 
557.3687425179322, 55.30373559278079, 2990.6349082389743, 756.137334816396, 
3105.963720724062, 2986.7065128387317, 1950.6652449184373, 2935.654051922416], 
"eval_len": [576, 1000, 262, 47, 1000, 1000, 1000, 1000, 518, 1000]}

 49%|████▉     | 489997/1000000 [3:53:33<2:52:13, 49.35it/s]global step 490000, trans_decision ep_re 3345.284311217717

{"global_step": 490000, "eval_re": [3401.6014925845884, 3919.9333222532896, 
3279.5983213313284, 3855.629594225201, 3725.9196860098355, 2543.703581957524, 
3931.841707311793, 4093.916566785694, 1143.6149966052337, 3557.0838431126886], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 807, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499995/1000000 [3:58:23<2:48:22, 49.49it/s]global step 500000, trans_decision ep_re 2099.336079422769

{"global_step": 500000, "eval_re": [2719.8826399821473, 2867.05272763104, 
3918.953660898903, 3399.4999090576075, 1759.9116020428928, 628.9877766306553, 
223.92501797361305, 390.2632048452079, 935.7729473254627, 4149.111307840165], 
"eval_len": [743, 785, 1000, 742, 459, 145, 176, 149, 259, 1000]}

 51%|█████     | 509995/1000000 [4:03:03<2:45:32, 49.33it/s]global step 510000, trans_decision ep_re 2059.5335302673957

{"global_step": 510000, "eval_re": [1931.464193212383, 1465.7490213477247, 
99.02027236408435, 1091.228222093309, 2062.2255571122714, 3326.5368085476425, 
1156.6657735266037, 3931.715894212158, 3025.117018513667, 2505.6125417441126], 
"eval_len": [489, 411, 42, 246, 1000, 845, 295, 1000, 822, 915]}

 52%|█████▏    | 519999/1000000 [4:07:53<2:43:49, 48.83it/s]global step 520000, trans_decision ep_re 2834.9901760311186

{"global_step": 520000, "eval_re": [1536.4520280318284, 814.2550174638336, 
3696.368843895457, 4038.221869692549, 4271.900824569647, 2393.2727918589353, 
1886.7818125272556, 4408.284034420332, 2002.7564385733533, 3301.6080992779953], 
"eval_len": [463, 294, 1000, 1000, 1000, 649, 541, 1000, 561, 1000]}

 53%|█████▎    | 529999/1000000 [4:12:43<2:37:41, 49.67it/s]global step 530000, trans_decision ep_re 2304.528354761919

{"global_step": 530000, "eval_re": [879.7287897095672, 3311.3242263641746, 
3827.5679656631896, 71.14287647453308, 3563.168865302008, 3701.370172395909, 
3409.1365392582197, 2021.2524677868648, 883.0389866890031, 1377.5526579757254], 
"eval_len": [265, 878, 1000, 36, 1000, 1000, 1000, 1000, 215, 345]}

 54%|█████▍    | 539999/1000000 [4:17:23<2:34:57, 49.48it/s]global step 540000, trans_decision ep_re 2743.1933016377043

{"global_step": 540000, "eval_re": [2070.5342089634873, 2186.6194244285575, 
3521.8495739271802, 3382.441757036465, 3625.135735035226, 4373.2632232245505, 
2909.910785003086, 333.1133024103541, 760.5206692643332, 4268.544337083804], 
"eval_len": [470, 537, 906, 1000, 1000, 1000, 1000, 92, 177, 1000]}

 55%|█████▍    | 549997/1000000 [4:22:13<2:35:06, 48.35it/s]global step 550000, trans_decision ep_re 2795.3062018128685

{"global_step": 550000, "eval_re": [4104.305694035106, 1280.7473107980215, 
101.68775342091573, 4209.101473660491, 3323.830134924602, 4093.953341142211, 
1121.700902918835, 4024.673968173986, 1804.118781092357, 3888.942657962159], 
"eval_len": [1000, 350, 50, 1000, 763, 1000, 316, 1000, 490, 1000]}

 56%|█████▌    | 559999/1000000 [4:27:03<2:28:16, 49.46it/s]global step 560000, trans_decision ep_re 3327.8029746011207

{"global_step": 560000, "eval_re": [1811.3539439356748, 4182.960435089576, 
1350.8603342396268, 4450.451420179759, 400.42543617765375, 4595.062778214576, 
4020.5715224054393, 4346.071124030274, 4185.950799062918, 3934.3219526757093], 
"eval_len": [472, 1000, 375, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [4:31:53<2:24:40, 49.54it/s]global step 570000, trans_decision ep_re 2081.591013295706

{"global_step": 570000, "eval_re": [490.68523552223024, 368.80950015653747, 
1185.325091843789, 3044.3724183523095, 1101.2993821860025, 2661.3680879099034, 
4131.020637501918, 3490.871090864612, 844.992792465022, 3497.1658961547337], 
"eval_len": [164, 96, 291, 1000, 255, 1000, 1000, 1000, 1000, 725]}

 58%|█████▊    | 579999/1000000 [4:36:43<2:21:20, 49.52it/s]global step 580000, trans_decision ep_re 1950.5903937738447

{"global_step": 580000, "eval_re": [199.54312777970935, 3727.761878624459, 
1959.8523228617626, 4526.977086155091, 4228.280121703192, -34.32527829753202, 
908.4250179548629, 1314.4043092358863, 841.51030113542, 1833.4750505855973], 
"eval_len": [82, 1000, 538, 1000, 1000, 1000, 226, 303, 306, 1000]}

 59%|█████▉    | 589999/1000000 [4:41:23<2:18:19, 49.40it/s]global step 590000, trans_decision ep_re 2352.113168343108

{"global_step": 590000, "eval_re": [-35.402198237189275, 4025.888458080339, 
2724.1129970390043, 2751.356929046671, 4051.2460026488834, 3867.242960441591, 
3975.4209081151926, 562.8793503697732, 421.97959251335993, 1176.4066834134578], 
"eval_len": [1000, 1000, 677, 699, 1000, 1000, 1000, 1000, 97, 395]}

 60%|█████▉    | 599995/1000000 [4:46:13<2:14:40, 49.50it/s]global step 600000, trans_decision ep_re 2654.971015165346

{"global_step": 600000, "eval_re": [561.2261546900137, 674.5306475453597, 
4052.5564402720534, 1387.1824502646161, 1082.8609219366583, 3381.104340724066, 
3976.3350162452352, 3294.6519890814957, 4008.0421495374603, 4131.220041356501], 
"eval_len": [250, 275, 1000, 325, 370, 1000, 977, 1000, 1000, 1000]}

 61%|██████    | 609995/1000000 [4:51:03<2:11:27, 49.44it/s]global step 610000, trans_decision ep_re 2027.88692535882

{"global_step": 610000, "eval_re": [858.6828882189546, 1991.939555530562, 
1858.3080297329466, 4068.3597676604595, 761.242115728517, 4453.360939782799, 
167.012911752413, 1505.7384417018632, 3066.179432069692, 1548.045171409994], 
"eval_len": [223, 468, 522, 1000, 205, 1000, 89, 346, 641, 374]}

 62%|██████▏   | 619995/1000000 [4:55:44<2:08:30, 49.28it/s]global step 620000, trans_decision ep_re 2103.9468519452375

{"global_step": 620000, "eval_re": [711.785827092207, 3529.82511087978, 
218.86266620461382, 1924.8395815647636, 3413.604967049627, 1168.4783596359573, 
4668.56561494514, 846.3813360793514, 3074.7745710019994, 1482.3504849989317], 
"eval_len": [139, 1000, 63, 419, 1000, 329, 1000, 193, 1000, 461]}

 63%|██████▎   | 629999/1000000 [5:00:34<2:05:06, 49.29it/s]global step 630000, trans_decision ep_re 2733.5799756002175

{"global_step": 630000, "eval_re": [4134.704415573197, 2996.9826974673238, 
3444.29708934806, 1180.062615984663, 4212.28806745673, 2068.276394597958, 
572.4974694103477, 2394.1004760637456, 3921.319679347244, 2411.270850752904], 
"eval_len": [1000, 777, 1000, 423, 1000, 576, 122, 732, 1000, 557]}

 64%|██████▍   | 639997/1000000 [5:05:14<2:01:35, 49.35it/s]global step 640000, trans_decision ep_re 2894.122877276518

{"global_step": 640000, "eval_re": [3880.5772543865273, 3785.7514878114853, 
4007.3504639022635, 1410.0221726225695, 1121.0435072055398, 1046.05905676672, 
1331.9555947504437, 4103.660792608176, 4590.066689783514, 3664.7417529279414], 
"eval_len": [1000, 981, 1000, 390, 263, 274, 419, 1000, 1000, 863]}

 65%|██████▍   | 649997/1000000 [5:10:04<1:57:48, 49.52it/s]global step 650000, trans_decision ep_re 3045.925654559685

{"global_step": 650000, "eval_re": [1324.5968468821372, 3301.1647984662186, 
3817.817165149524, 1578.3501676461326, 1377.0560493458206, 3563.708949715656, 
4334.567014685546, 4340.645209194707, 3626.9172825958735, 3194.4330619152306], 
"eval_len": [376, 777, 1000, 521, 347, 1000, 967, 1000, 1000, 766]}

 66%|██████▌   | 659997/1000000 [5:14:54<1:54:39, 49.42it/s]global step 660000, trans_decision ep_re 2900.667514681888

{"global_step": 660000, "eval_re": [3144.4406013597663, 4125.040748997185, 
880.4161546332749, 4035.668390986692, 205.15507812128155, 3814.4679431899795, 
3653.6682113473007, 4017.8281130863284, 4502.351749206647, 627.6381558904262], 
"eval_len": [740, 1000, 1000, 1000, 65, 826, 1000, 1000, 1000, 235]}

 67%|██████▋   | 669995/1000000 [5:19:44<1:51:56, 49.13it/s]global step 670000, trans_decision ep_re 2045.3979751244751

{"global_step": 670000, "eval_re": [3948.45715200765, 1464.1886711265672, 
232.99785622666153, 2116.1428267667393, 1667.3534076521316, 129.3377224501682, 
1587.5552736181755, 3824.911924406048, 2610.4145069934066, 2872.6204099972006], 
"eval_len": [905, 369, 122, 571, 1000, 48, 417, 895, 681, 766]}

 68%|██████▊   | 679995/1000000 [5:24:24<1:47:38, 49.55it/s]global step 680000, trans_decision ep_re 3979.564790596685

{"global_step": 680000, "eval_re": [3760.1510150830713, 4260.513461616496, 
3889.690473643137, 3369.004411005322, 4104.200580555296, 3780.209305245386, 
4491.481874179051, 4000.033511747983, 3710.6592364216526, 4429.704036469456], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689997/1000000 [5:29:24<1:44:40, 49.36it/s]global step 690000, trans_decision ep_re 3375.037199321193

{"global_step": 690000, "eval_re": [3017.6181306696635, 3721.100268387449, 
2826.332590343681, 1965.4618397265594, 2690.5010194046004, 3526.6186489906004, 
3812.995378488818, 4329.6860065348055, 3797.7288149742785, 4062.329295691477], 
"eval_len": [1000, 1000, 707, 1000, 638, 904, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [5:34:14<2:42:20, 30.80it/s]global step 700000, trans_decision ep_re 1984.8482082586222

{"global_step": 700000, "eval_re": [100.46051669020177, 4000.908750166993, 
385.6457140662541, 4103.871958414863, 4103.141485637739, 1216.262590002091, 
711.2046064655946, 478.2538355553155, 1270.8267039098541, 3477.9059216773157], 
"eval_len": [53, 898, 131, 1000, 1000, 298, 152, 150, 447, 1000]}

 71%|███████   | 709999/1000000 [5:38:55<1:37:37, 49.51it/s]global step 710000, trans_decision ep_re 2387.703460494265

{"global_step": 710000, "eval_re": [784.286510190942, 3824.9484829758235, 
2178.1566515794684, 171.02454380348945, 4337.481463664607, 3682.325584414941, 
1022.3496588404338, 1570.0833094168397, 2192.398686588224, 4113.979713467881], 
"eval_len": [230, 1000, 628, 60, 1000, 1000, 1000, 352, 1000, 1000]}

 72%|███████▏  | 719997/1000000 [5:43:45<1:34:44, 49.25it/s]global step 720000, trans_decision ep_re 2038.388033922968

{"global_step": 720000, "eval_re": [605.387087623445, 2353.8596709355293, 
3641.485613553796, 485.57890752657266, 1026.0080806011836, 4072.8500487401366, 
4160.564614440518, 3502.4445098223255, 250.13156473670713, 285.57024124946366], 
"eval_len": [188, 606, 936, 130, 299, 1000, 1000, 1000, 68, 88]}

 73%|███████▎  | 729997/1000000 [5:48:35<1:32:13, 48.80it/s]global step 730000, trans_decision ep_re 2515.9998740803926

{"global_step": 730000, "eval_re": [3843.65022725884, 2971.8203109451315, 
1775.284362258097, 224.93348300570852, 4103.031597674166, 3938.973346588576, 
3170.1118300616913, 568.2576938742893, 3887.662889510172, 676.2729996272533], 
"eval_len": [1000, 754, 466, 65, 1000, 975, 1000, 259, 1000, 172]}

 74%|███████▍  | 739999/1000000 [5:53:15<1:28:15, 49.10it/s]global step 740000, trans_decision ep_re 1797.0901650806343

{"global_step": 740000, "eval_re": [1183.9275724704037, 237.0417672817925, 
2882.14557463502, 994.0015652117005, 292.40697061409907, 910.882921416524, 
3743.427369726644, 381.81719974899255, 3295.8978019288465, 4049.3529077723197], 
"eval_len": [1000, 120, 746, 281, 87, 252, 1000, 112, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [5:58:05<1:24:40, 49.20it/s]global step 750000, trans_decision ep_re 1280.681240801673

{"global_step": 750000, "eval_re": [2274.7512490543913, 784.661544281478, 
201.22418100684658, 1018.1545441289338, 2213.9084794877926, 3572.473263888231, 
158.07120435518647, 589.3066743857505, 652.7910985134397, 1341.4701689146802], 
"eval_len": [568, 1000, 90, 267, 652, 836, 53, 1000, 188, 417]}

 76%|███████▌  | 759997/1000000 [6:02:45<1:20:59, 49.38it/s]global step 760000, trans_decision ep_re 2210.280627897325

{"global_step": 760000, "eval_re": [4153.116270556652, 465.11446382408525, 
1864.6756958244466, 1970.4328668569783, 3608.7358737426707, 3020.304422861076, 
442.0116881316803, 862.4128628167019, 4478.869372827075, 1237.1327615318887], 
"eval_len": [1000, 118, 491, 454, 879, 820, 158, 374, 1000, 285]}

 77%|███████▋  | 769999/1000000 [6:07:25<1:17:43, 49.32it/s]global step 770000, trans_decision ep_re 2553.6145141583784

{"global_step": 770000, "eval_re": [2315.9662748531987, 4039.2398703482763, 
1238.4360588940071, 782.6350748301833, 3494.2105442394122, 4003.7358181265813, 
1196.7691754120353, 746.3604403252456, 3738.8175763232425, 3979.974308231602], 
"eval_len": [1000, 1000, 329, 345, 1000, 1000, 283, 165, 1000, 1000]}

 78%|███████▊  | 779999/1000000 [6:12:15<1:14:35, 49.16it/s]global step 780000, trans_decision ep_re 2589.968177078916

{"global_step": 780000, "eval_re": [961.0173563620444, 273.83161042896666, 
3702.6992390918044, 1188.6255517422865, 3914.748051091055, 4120.580734654497, 
3294.428966241845, 2523.988205022529, 2128.7467638766893, 3791.015292277444], 
"eval_len": [1000, 69, 1000, 1000, 1000, 1000, 1000, 700, 545, 1000]}

 79%|███████▉  | 789999/1000000 [6:17:05<1:11:07, 49.21it/s]global step 790000, trans_decision ep_re 2403.5383240696247

{"global_step": 790000, "eval_re": [4124.477811949599, 3889.4287252576214, 
557.69914085811, 2638.5289589068543, 3739.8387445755184, 1199.7203136127082, 
238.48719923550732, 1432.2706169542787, 1966.7626258303987, 4248.1691035156555],
"eval_len": [1000, 1000, 189, 639, 1000, 300, 62, 333, 513, 1000]}

 80%|███████▉  | 799997/1000000 [6:21:45<1:07:38, 49.28it/s]global step 800000, trans_decision ep_re 3214.2424272189296

{"global_step": 800000, "eval_re": [4439.940558721263, 3579.5899121867733, 
35.18369308494713, 3937.201408672678, 24.434312837391825, 4571.1303481668065, 
2155.8689937945146, 4517.429143709, 4428.809636687791, 4452.836264328134], 
"eval_len": [1000, 1000, 22, 1000, 19, 1000, 459, 1000, 1000, 1000]}

 81%|████████  | 809995/1000000 [6:26:35<1:04:01, 49.46it/s]global step 810000, trans_decision ep_re 2635.0656707489015

{"global_step": 810000, "eval_re": [224.354311660558, 3457.7413691316515, 
1352.8193243815917, 3955.146087342603, 1820.3153276374412, 3742.331354369059, 
1404.8799373391876, 4060.008041924199, 3694.690020419261, 2638.3709332834624], 
"eval_len": [61, 1000, 330, 1000, 598, 1000, 355, 1000, 1000, 877]}

 82%|████████▏ | 819997/1000000 [6:31:25<1:01:09, 49.06it/s]global step 820000, trans_decision ep_re 3000.319391934736

{"global_step": 820000, "eval_re": [3382.376305707384, 3653.691117193292, 
3993.2402374360613, 1310.1119454507905, 3828.2357472981726, 3771.3435547871045, 
1776.0206979448535, 4237.670266253656, 3423.140247052305, 627.3638002237367], 
"eval_len": [908, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 881, 191]}

 83%|████████▎ | 829997/1000000 [6:36:15<57:35, 49.20it/s]global step 830000, trans_decision ep_re 3069.8333756339593

{"global_step": 830000, "eval_re": [4505.463518051677, 3583.609697456056, 
3478.8486816029144, 824.4754899920111, 4216.716965113681, 4262.814685267198, 
3477.074913236378, 1595.6482461461333, 3572.1638017607934, 1181.5177577127558], 
"eval_len": [1000, 1000, 1000, 306, 987, 1000, 1000, 359, 1000, 306]}

 84%|████████▍ | 839999/1000000 [6:41:05<54:41, 48.76it/s]global step 840000, trans_decision ep_re 2527.870587520019

{"global_step": 840000, "eval_re": [3074.770890454975, 659.8084181778258, 
3442.8468028762986, 3570.9358692035275, 927.7393265249735, 3895.37351483704, 
4312.000088025993, 203.1897692135788, 4063.19576781039, 1128.8454280755866], 
"eval_len": [1000, 209, 848, 1000, 244, 1000, 1000, 106, 1000, 349]}

 85%|████████▍ | 849995/1000000 [6:45:55<50:45, 49.25it/s]global step 850000, trans_decision ep_re 2331.276829226794

{"global_step": 850000, "eval_re": [2034.8475410415153, 3166.850911068594, 
4031.888943711564, 906.5337069495191, 2097.1521960269083, 2453.2504461917706, 
3943.721811406909, 837.9312145585895, 1445.4852798757659, 2395.106241436802], 
"eval_len": [1000, 1000, 996, 224, 524, 628, 1000, 245, 395, 597]}

 86%|████████▌ | 859995/1000000 [6:50:35<46:28, 50.21it/s]global step 860000, trans_decision ep_re 1813.0406037190023

{"global_step": 860000, "eval_re": [1227.3735047092102, -864.9384530723521, 
921.3988825402599, 1364.1663395028781, 1689.6437898504316, 4179.0127941399805, 
89.79523935104126, 2019.000456327815, 4146.189106248988, 3358.764377591771], 
"eval_len": [1000, 1000, 1000, 310, 465, 1000, 145, 1000, 1000, 1000]}

 87%|████████▋ | 869995/1000000 [6:55:25<43:22, 49.95it/s]global step 870000, trans_decision ep_re 2367.4548278889843

{"global_step": 870000, "eval_re": [3938.3324240108464, 1624.541091659552, 
872.6645050186542, 4126.14447257002, 1819.200789531342, 3783.092104683589, 
1348.2805196667116, 1193.1708334167554, 4806.179004261768, 162.94253407059998], 
"eval_len": [1000, 361, 200, 1000, 432, 1000, 375, 1000, 1000, 62]}

 88%|████████▊ | 879995/1000000 [7:00:05<39:56, 50.08it/s]global step 880000, trans_decision ep_re 2404.2246403553104

{"global_step": 880000, "eval_re": [1957.523511561036, 4604.827925167007, 
703.7997219240293, 3980.015150291051, 830.2979257490136, 1479.5809213129542, 
628.6727119224436, 4143.988154110038, 3817.1719261028234, 1896.3684554127112], 
"eval_len": [1000, 1000, 351, 1000, 220, 383, 172, 1000, 1000, 444]}

 89%|████████▉ | 889995/1000000 [7:04:45<36:46, 49.86it/s]global step 890000, trans_decision ep_re 2613.568075597233

{"global_step": 890000, "eval_re": [3795.352745822803, 4347.090003430355, 
3404.326486965109, 3799.284357002547, 399.4552444059559, 923.4656897325776, 
3416.6820717607943, 1377.6839002242994, 1264.072041854808, 3408.268214773083], 
"eval_len": [1000, 1000, 1000, 1000, 108, 249, 1000, 330, 381, 771]}

 90%|████████▉ | 899997/1000000 [7:09:25<34:46, 47.94it/s]global step 900000, trans_decision ep_re 2497.8974810319946

{"global_step": 900000, "eval_re": [4157.300235418466, 1251.124993429386, 
430.36087682011083, 2898.0068031379456, 3290.511362762127, 693.5814703682673, 
4344.738057933742, 960.9581322807392, 4509.357922233902, 2443.034955935256], 
"eval_len": [1000, 344, 272, 779, 1000, 148, 1000, 322, 1000, 593]}

 91%|█████████ | 909997/1000000 [7:14:05<30:03, 49.90it/s]global step 910000, trans_decision ep_re 2500.2558415940807

{"global_step": 910000, "eval_re": [649.3391544553115, 2408.530231543662, 
3148.9727027331464, 306.9199879829707, 1371.6793101063402, 1788.0525839723382, 
3682.944831648703, 3860.9637838696385, 3573.178738437946, 4211.977091190752], 
"eval_len": [156, 657, 820, 1000, 350, 420, 1000, 1000, 881, 1000]}

 92%|█████████▏| 919997/1000000 [7:18:55<26:36, 50.10it/s]global step 920000, trans_decision ep_re 1659.8568415975656

{"global_step": 920000, "eval_re": [2714.778929142714, 3241.4371858573923, 
2798.0666326823293, 784.6277224025591, -324.2596672610375, 773.2777955059292, 
2143.480952420271, 1260.0466786461438, 1096.0149726951138, 2111.0972138842426], 
"eval_len": [642, 1000, 1000, 1000, 1000, 189, 713, 386, 277, 573]}

 93%|█████████▎| 929999/1000000 [7:23:18<23:17, 50.08it/s]global step 930000, trans_decision ep_re 821.2541347414004

{"global_step": 930000, "eval_re": [1329.791276736032, 401.014782252665, 
436.5802841332386, 1472.0243287857247, 1554.8817529058533, 378.1217143642577, 
575.7137485163711, 124.10680410019484, 511.29649488795076, 1429.010160731716], 
"eval_len": [1000, 102, 110, 474, 1000, 118, 159, 78, 170, 351]}

 94%|█████████▍| 939999/1000000 [7:28:05<20:04, 49.81it/s]global step 940000, trans_decision ep_re 809.4180239214123

{"global_step": 940000, "eval_re": [290.2576109844067, 416.27544446609176, 
125.26055242418518, 2323.8370539969765, 903.1847295479718, 1318.92485661939, 
1017.4136574177062, 910.3463966716113, 668.2082512486849, 120.47168583709907], 
"eval_len": [83, 177, 52, 602, 1000, 352, 1000, 1000, 199, 44]}

 95%|█████████▍| 949997/1000000 [7:32:45<18:15, 45.63it/s]global step 950000, trans_decision ep_re 1464.1759222374737

{"global_step": 950000, "eval_re": [1287.311159621504, 545.5597178198323, 
485.267575752405, 869.02553671633, 3823.9934135240665, 405.3091559164959, 
364.52041012787106, 4320.899764415782, 1507.0787427540013, 1032.7937457264495], 
"eval_len": [1000, 1000, 239, 1000, 1000, 129, 111, 1000, 377, 297]}

 96%|█████████▌| 959997/1000000 [7:37:25<25:54, 25.74it/s]global step 960000, trans_decision ep_re 2323.1062455052515

{"global_step": 960000, "eval_re": [2908.7281326115426, 4074.4398390835154, 
2717.2516662862213, 1665.2636077007462, 1154.5614543895535, 2622.5723692145675, 
3523.6006915261246, 4095.9664268918764, 284.50814857451564, 184.17011877385366],
"eval_len": [1000, 847, 773, 411, 360, 775, 1000, 1000, 852, 74]}

 97%|█████████▋| 969995/1000000 [7:42:27<10:06, 49.46it/s]global step 970000, trans_decision ep_re 3316.2582510168054

{"global_step": 970000, "eval_re": [3852.3010195547317, 4352.889710688483, 
4611.653631839945, 1132.51874292187, 978.9679603706591, 3137.678205215864, 
4068.736253895493, 3565.9733114418027, 3451.9545670958755, 4009.9091071433268], 
"eval_len": [1000, 1000, 1000, 243, 278, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979995/1000000 [7:47:17<06:43, 49.63it/s]global step 980000, trans_decision ep_re 2696.6117199122045

{"global_step": 980000, "eval_re": [3552.30585084504, 3395.100423647084, 
3849.326825674853, 972.2569518544752, 748.6739606926536, 1591.5022802694448, 
3492.483583960834, 3171.481817344422, 2151.8685370160183, 4041.11696781722], 
"eval_len": [1000, 1000, 1000, 253, 203, 411, 1000, 1000, 551, 1000]}

 99%|█████████▉| 989997/1000000 [7:52:08<03:22, 49.41it/s]global step 990000, trans_decision ep_re 1482.871436748837

{"global_step": 990000, "eval_re": [50.594489943192706, 2427.781947050501, 
3046.9588282189607, 2735.327814790545, 1523.3318892832785, 318.33288468370785, 
198.9572282480683, 1324.6773737572585, 522.467691418536, 2680.2842200943205], 
"eval_len": [32, 727, 1000, 1000, 1000, 129, 76, 321, 161, 715]}

100%|█████████▉| 999997/1000000 [7:56:48<00:00, 49.68it/s]global step 1000000, trans_decision ep_re 1883.0901341253327

{"global_step": 1000000, "eval_re": [961.8799357365529, 3858.5253266084314, 
770.1002308924521, 1826.0952634427945, 625.6449802124966, 957.03251687779, 
3779.775433767139, 1729.5335140054085, 1873.7590296821252, 2448.5551100281377], 
"eval_len": [233, 1000, 164, 402, 149, 1000, 1000, 492, 1000, 581]}

100%|██████████| 1000000/1000000 [7:56:54<00:00, 34.95it/s]
