
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 2,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9995/1000000 [03:00<5:46:56, 47.56it/s]global step 10000, trans_decision ep_re 397.6948849895785

{"global_step": 10000, "eval_re": [29.044641462375708, 18.78601460655838, 
635.1473590302877, 633.1202810825006, 598.5884016325991, 627.0032372798315, 
672.6542644381171, 675.1109001874138, 3.4572970042994164, 84.03645317180165], 
"eval_len": [38, 35, 1000, 1000, 1000, 1000, 1000, 1000, 25, 125]}

  2%|▏         | 19998/1000000 [08:30<7:32:03, 36.13it/s]global step 20000, trans_decision ep_re 653.1358533755629

{"global_step": 20000, "eval_re": [689.4974743105626, 685.3700175919421, 
724.2930773320426, 710.4641972509042, 714.6935266868737, 693.4878152018233, 
678.2655759043577, 225.4791935305299, 704.5805917006821, 705.2270642459105], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 382, 1000, 1000]}

  3%|▎         | 29999/1000000 [14:00<5:23:17, 50.01it/s]global step 30000, trans_decision ep_re 690.7241247943073

{"global_step": 30000, "eval_re": [668.0630347550365, 224.8610091404803, 
810.165068562154, 729.9351313578504, 768.4659386729487, 729.9990453094409, 
716.4988199184085, 521.7227328394073, 898.4870864514517, 839.043380935895], 
"eval_len": [1000, 253, 1000, 1000, 1000, 1000, 1000, 730, 1000, 1000]}

  4%|▍         | 39999/1000000 [19:40<5:18:31, 50.23it/s]global step 40000, trans_decision ep_re 890.8887538077946

{"global_step": 40000, "eval_re": [896.164562211897, 910.6785574123267, 
890.3115273687738, 826.9861732969664, 920.6870859310302, 919.152530043649, 
916.195588063897, 779.9559513162897, 943.8210881881633, 904.9344742449529], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49996/1000000 [25:20<5:31:05, 47.82it/s]global step 50000, trans_decision ep_re 897.4172479099677

{"global_step": 50000, "eval_re": [1009.2804187340525, 822.0630722372904, 
1083.0952137122465, 821.4388711172966, 772.2945978516482, 1002.014088997581, 
785.0076692618844, 914.0131240378655, 823.0154239003587, 941.9499992494548], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [30:50<5:47:21, 45.10it/s]global step 60000, trans_decision ep_re 770.6734778252201

{"global_step": 60000, "eval_re": [1383.0802294973087, 1353.8499272421075, 
771.2337373646302, 948.9280388458704, 64.60079953510885, 888.9444078843633, 
360.65079885231194, 576.2016854526839, 171.7480774041574, 1187.4970761736583], 
"eval_len": [1000, 1000, 597, 703, 42, 1000, 282, 494, 134, 1000]}

  7%|▋         | 69995/1000000 [36:00<5:08:10, 50.30it/s]global step 70000, trans_decision ep_re 581.4144515623948

{"global_step": 70000, "eval_re": [1013.4858836536305, 225.29750776393723, 
366.96995284657663, 1024.4763482570106, 865.7978024226289, 733.3105017409873, 
821.2951996760128, 213.17271449652281, 515.6009587245884, 34.737646042053065], 
"eval_len": [1000, 156, 254, 1000, 1000, 487, 1000, 186, 327, 30]}

  8%|▊         | 79998/1000000 [41:20<5:33:39, 45.96it/s]global step 80000, trans_decision ep_re 1002.0620992164434

{"global_step": 80000, "eval_re": [1221.0639339842432, 2366.0828356617935, 
1072.1316121777643, 896.3381390220441, 1544.5492008550202, 710.1755362834558, 
323.669369761124, 103.89244845551273, 697.5646555606318, 1085.1532604028425], 
"eval_len": [640, 1000, 545, 461, 701, 1000, 176, 51, 1000, 448]}

  9%|▉         | 89999/1000000 [46:40<7:13:24, 34.99it/s]global step 90000, trans_decision ep_re 1395.1596846164407

{"global_step": 90000, "eval_re": [2137.919406254246, 1206.3219343612595, 
2088.38066272827, 1296.4127434929408, 1179.2232165049247, 1532.2811172023619, 
886.9267627612179, 671.448183919925, 2165.255527555033, 787.4272913842293], 
"eval_len": [1000, 607, 1000, 626, 1000, 717, 1000, 235, 1000, 451]}

 10%|▉         | 99997/1000000 [52:00<6:57:07, 35.96it/s]global step 100000, trans_decision ep_re 1246.723475715092

{"global_step": 100000, "eval_re": [2211.8013473603246, 536.3970873076427, 
2088.900639984486, 1190.606388955315, 1293.4972885092884, 1071.2253475033947, 
1082.0920039584603, 422.8561637118998, 1670.8765482477286, 898.9819416123784], 
"eval_len": [873, 284, 1000, 545, 1000, 1000, 1000, 218, 1000, 1000]}

 11%|█         | 109999/1000000 [57:40<6:42:37, 36.84it/s]global step 110000, trans_decision ep_re 1554.0746871397225

{"global_step": 110000, "eval_re": [2742.8527689787056, 1848.7895927886102, 
1095.7484241404327, 815.7539685297601, 203.18166373157462, 1300.0005992861559, 
2502.6060891634374, 1837.8655116977486, 1274.5705457385761, 1919.3777073422248],
"eval_len": [1000, 1000, 553, 406, 99, 1000, 1000, 1000, 1000, 785]}

 12%|█▏        | 119997/1000000 [1:03:10<5:12:33, 46.92it/s]global step 120000, trans_decision ep_re 2374.0553088960946

{"global_step": 120000, "eval_re": [2931.222808568006, 3027.107985848221, 
2738.1299912000845, 1432.8626427003435, 2704.9269579688944, 1780.76707553722, 
2492.2019323744853, 3153.860728443981, 871.2308167433744, 2608.2421495763415], 
"eval_len": [1000, 1000, 895, 564, 1000, 575, 1000, 1000, 274, 1000]}

 13%|█▎        | 129996/1000000 [1:08:50<6:36:09, 36.60it/s]global step 130000, trans_decision ep_re 2294.1890667746616

{"global_step": 130000, "eval_re": [2924.989045623667, 2788.081786861315, 
2631.6824536064873, 559.8699526214516, 2719.668126607277, 2597.35528620685, 
2596.4800175913697, 2520.6894429172144, 2337.9951771610336, 1265.079378549953], 
"eval_len": [1000, 1000, 1000, 183, 1000, 1000, 1000, 844, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:14:30<5:33:04, 43.03it/s]global step 140000, trans_decision ep_re 2352.62383967627

{"global_step": 140000, "eval_re": [1386.0675177776805, 2645.9544347323285, 
2498.0670572877425, 2244.3122182136262, 2659.320275878112, 2394.219629008428, 
2407.6105756427028, 2033.6443206062215, 2325.383310290986, 2931.6590573248727], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149996/1000000 [1:20:10<6:23:08, 36.98it/s]global step 150000, trans_decision ep_re 1953.6098504465458

{"global_step": 150000, "eval_re": [1329.1613155542875, 1547.0022728575345, 
2612.7849212052265, 1022.6967714064314, 2754.161930867367, 2984.7774348462376, 
1916.5403711080933, 603.6956504429513, 2934.0266689948735, 1831.2511671824586], 
"eval_len": [436, 556, 1000, 289, 1000, 1000, 685, 277, 1000, 640]}

 16%|█▌        | 159996/1000000 [1:25:30<5:06:03, 45.74it/s]global step 160000, trans_decision ep_re 1229.7703025485657

{"global_step": 160000, "eval_re": [240.5463342356302, 86.43527946067626, 
501.9446309821387, 776.3163635007303, 1898.1219596872265, 908.4152061678, 
2416.0500337883013, 2569.266205654841, 1096.9058480081096, 1803.7011640002024], 
"eval_len": [82, 44, 180, 305, 658, 404, 936, 797, 428, 1000]}

 17%|█▋        | 169995/1000000 [1:31:00<5:41:17, 40.53it/s]global step 170000, trans_decision ep_re 2596.3433368078977

{"global_step": 170000, "eval_re": [2962.8114398567086, 2614.152659249312, 
2529.5725058928315, 2674.4422200326103, 2523.838323355928, 2752.3649265877966, 
2291.6561526139176, 2800.886572615426, 2727.4846215337893, 2086.223946340654], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 926, 946, 1000, 1000]}

 18%|█▊        | 179997/1000000 [1:36:20<4:50:37, 47.03it/s]global step 180000, trans_decision ep_re 1978.9342836615099

{"global_step": 180000, "eval_re": [1699.9812942542453, 187.27974781872555, 
2973.973514757274, 2689.6966938964224, 3482.8034923696373, 2394.68717109529, 
806.6184701322952, 1127.2197627494998, 2801.9213186341467, 1625.1613709075623], 
"eval_len": [1000, 76, 1000, 1000, 1000, 801, 348, 454, 1000, 1000]}

 19%|█▉        | 189997/1000000 [1:42:00<4:28:22, 50.30it/s]global step 190000, trans_decision ep_re 1528.4450269369593

{"global_step": 190000, "eval_re": [743.912594944264, 2144.6015946062707, 
2163.5334980106086, 499.44948833690734, 95.33438497460422, 2551.367502342497, 
1888.3412074553128, 2567.344238769391, 2365.200073944337, 265.36568598539895], 
"eval_len": [404, 837, 843, 182, 49, 1000, 1000, 1000, 1000, 98]}

 20%|█▉        | 199995/1000000 [1:47:20<4:25:10, 50.28it/s]global step 200000, trans_decision ep_re 2152.332243506321

{"global_step": 200000, "eval_re": [1087.6015787738322, 3222.808621208104, 
2838.309875044508, 2757.197451801383, 2524.954171904537, 2886.8066184543827, 
2795.8246337878113, 532.3486102646294, 1261.3112472671373, 1616.159626556886], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 189, 1000, 561]}

 21%|██        | 209999/1000000 [1:53:01<4:22:29, 50.16it/s]global step 210000, trans_decision ep_re 2795.539225895091

{"global_step": 210000, "eval_re": [3044.203014552198, 2942.1672274541734, 
3007.3269503064794, 3110.47610169098, 3022.890488307636, 3081.7473020409257, 
3157.6188089031343, 173.01011358183436, 3197.162897035951, 3218.7893550775943], 
"eval_len": [1000, 1000, 904, 1000, 1000, 1000, 1000, 84, 1000, 1000]}

 22%|██▏       | 219999/1000000 [1:58:41<4:18:51, 50.22it/s]global step 220000, trans_decision ep_re 1848.7223542947327

{"global_step": 220000, "eval_re": [247.3368598435013, 1785.2773508731595, 
385.1818800022461, 2822.7699474524916, 3185.285500830539, 890.3143009202813, 
3245.579419034214, 126.74601395600152, 2573.4148671631083, 3225.317402871781], 
"eval_len": [91, 1000, 131, 1000, 1000, 311, 1000, 62, 1000, 1000]}

 23%|██▎       | 229997/1000000 [2:04:01<4:15:49, 50.17it/s]global step 230000, trans_decision ep_re 2244.465955533515

{"global_step": 230000, "eval_re": [3230.9757570238075, 1612.1531053971653, 
3015.0655637696705, 3156.440793985676, 1538.7974967706066, 812.7547857506543, 
3199.0126226150874, 2249.506110953074, 737.0603291981902, 2892.8929898712195], 
"eval_len": [1000, 472, 1000, 1000, 1000, 243, 1000, 1000, 211, 1000]}

 24%|██▍       | 239995/1000000 [2:09:21<4:28:23, 47.19it/s]global step 240000, trans_decision ep_re 2471.2915558149825

{"global_step": 240000, "eval_re": [2594.4644084523243, 3331.5692623612963, 
3375.3647082850025, 1271.7433883100239, 2806.9380165144403, 2956.963871062536, 
2463.518997372975, 146.03170151067513, 2694.7506537714407, 3071.5705505091128], 
"eval_len": [1000, 1000, 1000, 537, 1000, 1000, 1000, 132, 1000, 1000]}

 25%|██▍       | 249996/1000000 [2:14:41<5:40:37, 36.70it/s]global step 250000, trans_decision ep_re 2952.4760562327783

{"global_step": 250000, "eval_re": [2974.35464641425, 2250.8170638606116, 
3150.4209849562935, 2789.690965492587, 3487.1505480328983, 2986.1776727946612, 
3262.9480959979824, 3274.2146286446623, 3198.5893589101515, 2150.3965972236847],
"eval_len": [1000, 731, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 746]}

 26%|██▌       | 259998/1000000 [2:20:11<5:33:44, 36.95it/s]global step 260000, trans_decision ep_re 3102.2605502373726

{"global_step": 260000, "eval_re": [3263.752366311134, 3010.291124192979, 
3158.339517521605, 3462.495086532329, 3149.6352238555273, 3002.907202320615, 
2523.5929673364376, 2969.5789029309854, 3143.453943882196, 3338.5591674899197], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 770, 1000, 1000, 1000]}

 27%|██▋       | 269998/1000000 [2:25:41<4:15:56, 47.54it/s]global step 270000, trans_decision ep_re 2264.0277143383933

{"global_step": 270000, "eval_re": [2750.9058207974267, 2197.4773611332944, 
866.2104758517468, 2563.804661245712, 2001.175754385485, 2854.7347182547624, 
164.7203030319782, 3370.1232171877054, 2838.0373693881606, 3033.087462107666], 
"eval_len": [1000, 1000, 288, 834, 702, 930, 96, 1000, 1000, 1000]}

 28%|██▊       | 279996/1000000 [2:30:51<5:28:02, 36.58it/s]global step 280000, trans_decision ep_re 1552.2559874749518

{"global_step": 280000, "eval_re": [2730.446742463547, 940.8671136014814, 
1340.5129015408872, 2594.5423887826587, 312.9522460204588, 3198.0296304888234, 
3231.7692791691306, 213.77148828413326, 336.7328835150872, 622.9352008833118], 
"eval_len": [783, 293, 468, 1000, 128, 1000, 1000, 184, 105, 169]}

 29%|██▉       | 289999/1000000 [2:35:51<4:49:42, 40.85it/s]global step 290000, trans_decision ep_re 2197.686733652964

{"global_step": 290000, "eval_re": [2399.260209548162, 2890.66037526839, 
2000.4307441192789, 1641.1681200220225, 2597.157862065502, 3190.1373316889217, 
1316.5068358200665, 3185.0721924257796, 2620.414354782254, 136.05931078926037], 
"eval_len": [853, 1000, 604, 512, 1000, 1000, 1000, 1000, 775, 56]}

 30%|██▉       | 299998/1000000 [2:41:11<4:04:04, 47.80it/s]global step 300000, trans_decision ep_re 2835.615050898844

{"global_step": 300000, "eval_re": [2883.5816276930004, 3090.091067683822, 
3059.685779823921, 2985.144691991098, 3006.4988764069694, 3412.468984448003, 
1323.3271334804933, 2461.6496060912837, 3244.351911927673, 2889.3508294421767], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309997/1000000 [2:46:21<3:49:08, 50.19it/s]global step 310000, trans_decision ep_re 3009.614278806409

{"global_step": 310000, "eval_re": [2749.6350789642465, 3221.067097697416, 
2156.635838738693, 3019.939194013517, 3047.1183094482753, 3033.7629157287397, 
3119.264030438408, 3339.0371932285316, 3182.593357273658, 3227.0897725326026], 
"eval_len": [1000, 1000, 791, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319995/1000000 [2:51:21<3:47:28, 49.82it/s]global step 320000, trans_decision ep_re 2774.7490884262356

{"global_step": 320000, "eval_re": [3176.9859067324496, 3401.978765482945, 
2833.6088745807506, 294.46511815099433, 2836.9818117610976, 3046.542951020487, 
3307.652968112979, 2820.743489140075, 3037.7806828796934, 2990.7503164008845], 
"eval_len": [1000, 1000, 1000, 140, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329997/1000000 [2:55:53<3:43:17, 50.01it/s]global step 330000, trans_decision ep_re 1203.9690374121233

{"global_step": 330000, "eval_re": [1936.5165773218232, 201.61230228751276, 
3036.786416725123, 529.6078321677608, 2038.7322110218197, 80.78150382601663, 
65.42173235017736, 666.2286461849071, 710.9891202491457, 2773.014031986946], 
"eval_len": [684, 69, 1000, 209, 617, 42, 42, 193, 217, 789]}

 34%|███▍      | 339999/1000000 [3:00:41<3:39:51, 50.03it/s]global step 340000, trans_decision ep_re 2621.715518616423

{"global_step": 340000, "eval_re": [2816.2286959440125, 2548.057926524475, 
1666.202269072307, 2690.0620884227596, 2570.212342157228, 2592.298297910277, 
3435.4758798684097, 2382.951403503293, 2564.0270595342145, 2951.639223227256], 
"eval_len": [1000, 1000, 666, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [3:05:41<3:59:51, 45.17it/s]global step 350000, trans_decision ep_re 370.8152614201396

{"global_step": 350000, "eval_re": [-732.6773780059008, 441.65595459880177, 
1333.346229233964, -271.5044175425184, 186.7672300356028, 897.26248977435, 
-1201.6505771377988, 1387.2512608739805, 117.73254953750799, 1549.969272833407],
"eval_len": [1000, 1000, 464, 1000, 62, 1000, 1000, 1000, 55, 1000]}

 36%|███▌      | 359995/1000000 [3:11:11<3:39:10, 48.67it/s]global step 360000, trans_decision ep_re -57.617546589953825

{"global_step": 360000, "eval_re": [-628.2037294790279, 63.31229699972498, 
59.526554138325224, 19.184883565002508, 19.920477051822616, 8.614682379608642, 
0.6769888786507998, 5.952885147715083, -118.26014213252498, -6.900362448835214],
"eval_len": [1000, 70, 63, 61, 41, 12, 111, 22, 1000, 19]}

 37%|███▋      | 369999/1000000 [3:16:01<3:43:19, 47.02it/s]global step 370000, trans_decision ep_re 1888.8569386598072

{"global_step": 370000, "eval_re": [1717.1535385414313, 564.1800688344389, 
2149.4352395556743, 2506.133281935335, 970.5426271815065, 1517.6104809935528, 
2001.9459280654746, 2770.5594344182878, 2319.505651472908, 2371.50313559946], 
"eval_len": [735, 1000, 978, 1000, 416, 545, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379997/1000000 [3:21:41<3:40:07, 46.94it/s]global step 380000, trans_decision ep_re 1799.3541687143047

{"global_step": 380000, "eval_re": [604.679515807235, 3283.699821946424, 
1917.4979804780835, 2300.294497981446, 3147.4875015448397, 821.2577908232952, 
1674.007523480004, 2724.3382749262596, 1257.6197664564074, 262.6590136990513], 
"eval_len": [427, 1000, 576, 1000, 1000, 262, 600, 876, 421, 194]}

 39%|███▉      | 389995/1000000 [3:26:31<3:23:46, 49.89it/s]global step 390000, trans_decision ep_re 1602.707212302712

{"global_step": 390000, "eval_re": [3307.7721384516235, 916.9081384293705, 
139.65128494985555, 915.6900622945933, 2627.085892125276, 517.9072157384485, 
2901.215207695326, 3382.61149752129, 736.3397841244362, 581.8909016969001], 
"eval_len": [1000, 1000, 55, 1000, 773, 1000, 1000, 1000, 244, 275]}

 40%|███▉      | 399997/1000000 [3:32:01<4:32:26, 36.71it/s]global step 400000, trans_decision ep_re 2248.9293984722744

{"global_step": 400000, "eval_re": [1902.0731576933515, 1451.3275639581325, 
2668.4142313720395, 2009.0213638903972, 3386.0396884665265, 2135.6887079110384, 
1919.037196367508, 2168.3987494422327, 3311.4806223996106, 1537.8127032219145], 
"eval_len": [755, 508, 861, 1000, 1000, 595, 573, 758, 1000, 542]}

 41%|████      | 409997/1000000 [3:37:21<4:30:23, 36.37it/s]global step 410000, trans_decision ep_re 2361.6529347221085

{"global_step": 410000, "eval_re": [2500.8950003212353, 1154.216003215923, 
1078.8226224423518, 3199.5973191732946, 1011.459609979956, 2246.549373713851, 
2823.5306059265395, 3235.2803425376733, 3401.513514331751, 2964.6649555785057], 
"eval_len": [769, 359, 286, 1000, 335, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419999/1000000 [3:42:21<3:13:12, 50.03it/s]global step 420000, trans_decision ep_re 2310.966708641153

{"global_step": 420000, "eval_re": [1263.0432774444434, 1512.0816160164675, 
3192.3513831289024, 2556.8321576440176, 1469.3626366190294, 2802.312729800143, 
2828.5318405243306, 3266.3621233252775, 1462.0139413750912, 2756.775380533829], 
"eval_len": [479, 449, 1000, 705, 484, 1000, 1000, 1000, 474, 1000]}

 43%|████▎     | 429995/1000000 [3:47:11<3:10:50, 49.78it/s]global step 430000, trans_decision ep_re 2088.200386308816

{"global_step": 430000, "eval_re": [1816.6093021607749, 163.95870583683197, 
3244.506952346903, 3506.549827506992, 3488.1657531413525, 934.3739123778288, 
1007.5952209770796, 3228.543911373156, 987.0742681625169, 2504.6260092047264], 
"eval_len": [1000, 87, 1000, 1000, 1000, 295, 1000, 1000, 343, 1000]}

 44%|████▍     | 439995/1000000 [3:51:51<3:07:57, 49.66it/s]global step 440000, trans_decision ep_re -156.3815234479464

{"global_step": 440000, "eval_re": [28.983565246337232, -860.9072780386125, 
-654.8030513281601, 62.51496351909538, -33.85713347919029, 9.856038929587173, 
18.99654546092745, 28.03562898300209, 58.571110786197345, -221.20562455864783], 
"eval_len": [45, 1000, 1000, 88, 46, 18, 25, 29, 38, 1000]}

 45%|████▍     | 449995/1000000 [3:56:31<3:04:40, 49.64it/s]global step 450000, trans_decision ep_re 1561.2362347467356

{"global_step": 450000, "eval_re": [2060.0357182611133, 3181.38703363793, 
-114.9926712904696, 2063.2584238914646, 436.14231100008095, 2407.690447943983, 
2568.220567939039, 328.33262589883697, 2267.778506264748, 414.5093839206273], 
"eval_len": [720, 1000, 1000, 835, 182, 1000, 904, 1000, 1000, 286]}

 46%|████▌     | 459999/1000000 [4:01:11<3:00:35, 49.84it/s]global step 460000, trans_decision ep_re 2748.6188190062726

{"global_step": 460000, "eval_re": [2881.51485126411, 2723.93045070569, 
2915.564040836221, 2686.978990462681, 2974.43188791309, 2892.738721668917, 
2529.1475953855206, 2582.185574361126, 2752.712507724329, 2546.9835697410426], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469997/1000000 [4:06:01<2:57:03, 49.89it/s]global step 470000, trans_decision ep_re 2639.0889759822826

{"global_step": 470000, "eval_re": [2709.9217877607907, 3221.756947225048, 
2365.1971987625693, 2870.4830692689807, 2111.456310273693, 2881.002654855979, 
2898.01966372319, 2740.5690658302246, 2159.716589793948, 2432.7664723284006], 
"eval_len": [1000, 1000, 1000, 1000, 798, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479997/1000000 [4:10:51<2:53:37, 49.92it/s]global step 480000, trans_decision ep_re 2036.3222351660836

{"global_step": 480000, "eval_re": [2450.4182136060963, 270.58566041896347, 
2400.1741530108666, 1366.9120915682527, 2350.6453156761454, 2432.5062926888445, 
2861.722611538884, 2078.84220296239, 2071.3885958715437, 2080.02721431885], 
"eval_len": [1000, 112, 1000, 587, 1000, 1000, 1000, 831, 1000, 721]}

 49%|████▉     | 489995/1000000 [4:15:41<2:49:57, 50.01it/s]global step 490000, trans_decision ep_re 1883.0111276499815

{"global_step": 490000, "eval_re": [2710.494470376787, 1635.9531248455392, 
2981.668126487149, 2846.003675034202, 2866.219141825984, 431.3159775841284, 
1384.2878314297566, 728.6594602338101, 2606.221576897209, 639.2878917852452], 
"eval_len": [1000, 601, 1000, 1000, 1000, 160, 479, 232, 1000, 290]}

 50%|████▉     | 499997/1000000 [4:20:21<2:47:34, 49.73it/s]global step 500000, trans_decision ep_re 2334.112658903894

{"global_step": 500000, "eval_re": [2261.2599675276697, 2557.1528215748244, 
2307.9420604544375, 2649.974809279037, 2412.3080675149226, 2316.3821921806534, 
2374.9950328908144, 2682.5683086454023, 866.597577401083, 2911.945751570094], 
"eval_len": [1000, 1000, 1000, 1000, 848, 1000, 1000, 1000, 350, 1000]}

 51%|█████     | 509995/1000000 [4:25:11<2:44:19, 49.70it/s]global step 510000, trans_decision ep_re 2020.4950811364092

{"global_step": 510000, "eval_re": [2311.02123796255, 1478.3787133337087, 
799.3150332341434, 2645.7284626098567, 2862.073156340624, 2704.5497826715005, 
-291.9686561549353, 2670.540701795477, 3031.647495447938, 1993.6648841232325], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 747]}

 52%|█████▏    | 519999/1000000 [4:30:01<2:40:23, 49.88it/s]global step 520000, trans_decision ep_re 1337.203957566716

{"global_step": 520000, "eval_re": [1994.38670469194, 1683.0530926920915, 
1973.5005288752973, 1291.428312997207, 1928.8442074542936, 20.249662093911454, 
1061.580960087316, 2131.3980539514164, 516.823987949699, 770.7740648739865], 
"eval_len": [1000, 1000, 1000, 1000, 871, 17, 1000, 1000, 328, 1000]}

 53%|█████▎    | 529995/1000000 [4:34:51<2:37:14, 49.82it/s]global step 530000, trans_decision ep_re 1815.1631624487286

{"global_step": 530000, "eval_re": [1859.6088558670467, 2992.1105628476566, 
3001.6118818202453, 1724.868765161434, 1534.6367567783386, 303.57088569858865, 
2685.5413484343285, 2396.3330511236577, 339.0341699767552, 1314.3153467792342], 
"eval_len": [645, 1000, 1000, 764, 1000, 131, 1000, 1000, 127, 485]}

 54%|█████▍    | 539997/1000000 [4:39:22<2:34:17, 49.69it/s]global step 540000, trans_decision ep_re 701.5441070503871

{"global_step": 540000, "eval_re": [1999.8332755611732, 603.7406905229611, 
377.6875147490965, 56.55746813523685, 149.88221482870722, 729.953846838828, 
355.19407603511394, 1121.187842008592, 684.3623030270428, 937.0418387971192], 
"eval_len": [868, 250, 211, 38, 82, 394, 114, 469, 339, 394]}

 55%|█████▍    | 549997/1000000 [4:44:11<2:33:05, 48.99it/s]global step 550000, trans_decision ep_re 1706.3052686118851

{"global_step": 550000, "eval_re": [2203.4206118636807, 322.4047947094105, 
1.7574018901666784, 2199.8081436244233, 2177.9884747781625, 828.7886968400891, 
1833.330835250247, 2176.395459378826, 2537.3148750519094, 2781.8433927319347], 
"eval_len": [1000, 234, 13, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559997/1000000 [4:49:01<2:27:34, 49.69it/s]global step 560000, trans_decision ep_re 2454.1146512264313

{"global_step": 560000, "eval_re": [2544.704452277256, 2442.0491956630226, 
2404.8179403164786, 2506.3678944875824, 2407.328118209725, 2351.67120240552, 
2345.489796433319, 2596.752316240643, 2607.4899945570432, 2334.4756016737224], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [4:53:51<2:23:34, 49.92it/s]global step 570000, trans_decision ep_re 2002.4420193976741

{"global_step": 570000, "eval_re": [2536.961300424827, 2155.958392180727, 
484.62982200671263, 2449.9955200449294, 2590.179581803878, 741.701338593397, 
1357.2931334962832, 2505.4250159120397, 2637.54665626587, 2564.7294332480765], 
"eval_len": [1000, 842, 212, 1000, 1000, 1000, 494, 1000, 1000, 1000]}

 58%|█████▊    | 579997/1000000 [4:58:41<2:20:22, 49.87it/s]global step 580000, trans_decision ep_re 2649.9751125909834

{"global_step": 580000, "eval_re": [2593.538688954726, 2661.907749746135, 
2863.028416515527, 2698.979908350824, 2983.975803794295, 2295.6657278547614, 
2536.7295148084663, 3059.713882704928, 2423.0624866139265, 2383.1489465662457], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [5:03:31<2:17:30, 49.69it/s]global step 590000, trans_decision ep_re 1233.7358749267494

{"global_step": 590000, "eval_re": [190.85568143621398, 1837.3330791122917, 
2553.1483193961203, 803.6729301924968, 774.4394740422557, 1799.8276252191522, 
2825.9718343642708, 172.25502773489708, 266.2814216420793, 1113.573356127715], 
"eval_len": [129, 1000, 1000, 1000, 1000, 1000, 1000, 70, 160, 398]}

 60%|█████▉    | 599996/1000000 [5:08:12<2:21:06, 47.24it/s]global step 600000, trans_decision ep_re 2520.661260184454

{"global_step": 600000, "eval_re": [2930.1715757325483, 3018.9878449145, 
2888.776647719816, 2479.8863244268277, 2495.395735394516, 2946.9361444714773, 
118.62629415919106, 2565.9511684680824, 3127.54993336924, 2634.330933188343], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 63, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [5:13:02<2:09:27, 50.21it/s]global step 610000, trans_decision ep_re 1581.7309162341178

{"global_step": 610000, "eval_re": [1978.8458597114106, 1086.8577003864154, 
2490.568632944254, 240.51020974729332, 798.1831752709456, 975.0230844639843, 
2543.7170054861185, 2633.6852535250077, 1432.0478801744248, 1637.8703606313238],
"eval_len": [1000, 1000, 1000, 132, 1000, 419, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619997/1000000 [5:17:52<2:06:26, 50.09it/s]global step 620000, trans_decision ep_re 2778.287416646107

{"global_step": 620000, "eval_re": [2651.332586498914, 2756.5455412801916, 
2954.067944592964, 2737.000369210326, 1868.5998582800098, 3135.9187095672323, 
3037.566371021677, 2997.8582352911194, 2586.9491580632803, 3057.0353926553566], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [5:22:42<2:03:28, 49.94it/s]global step 630000, trans_decision ep_re 1785.0955591776305

{"global_step": 630000, "eval_re": [1452.2868556384435, 2556.619862235535, 
2821.0453237825004, 1036.6630944183084, 999.5372447321806, 651.6166874172615, 
3265.0625869047935, 151.84581270016753, 3017.263456354736, 1899.0146675923802], 
"eval_len": [491, 1000, 1000, 387, 1000, 190, 1000, 60, 1000, 693]}

 64%|██████▍   | 639997/1000000 [5:27:22<2:01:16, 49.48it/s]global step 640000, trans_decision ep_re 3103.098957304847

{"global_step": 640000, "eval_re": [3332.712394832509, 3149.322081359895, 
2615.2511673447357, 3459.3520143352403, 2939.8530746502274, 3240.6479913276726, 
3049.3804729216718, 3004.8746045083503, 3400.493518480757, 2839.102253287408], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [5:32:12<1:56:00, 50.29it/s]global step 650000, trans_decision ep_re 1863.0034946804626

{"global_step": 650000, "eval_re": [723.5386808360662, 3057.305809866133, 
2776.977732220751, 1267.4205981320135, 703.5525695334225, 3348.904366037369, 
535.9132967167977, 1574.2777272689752, 1629.3964406033103, 3012.747725589786], 
"eval_len": [229, 1000, 928, 1000, 1000, 1000, 1000, 491, 505, 1000]}

 66%|██████▌   | 659999/1000000 [5:36:52<1:52:43, 50.27it/s]global step 660000, trans_decision ep_re 2470.3646732294847

{"global_step": 660000, "eval_re": [3311.669538359519, 1808.45698533075, 
2951.963971477859, 3313.6980530433857, 476.099867180664, 2382.9617378686876, 
702.7818332715874, 3470.6087587533384, 3210.6616560384164, 3074.744330970641], 
"eval_len": [1000, 561, 1000, 1000, 171, 712, 232, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [5:41:32<1:49:31, 50.21it/s]global step 670000, trans_decision ep_re 2789.055010345083

{"global_step": 670000, "eval_re": [3233.380106514705, 3275.0073635029294, 
3175.116092359572, 1899.64246573388, 2608.6376369790223, 2932.9303613993784, 
3289.9600641920506, 1592.2107162275972, 2824.5373212680047, 3059.1279752736878],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [5:46:22<1:45:59, 50.32it/s]global step 680000, trans_decision ep_re 2894.6585163224127

{"global_step": 680000, "eval_re": [3012.373523969871, 3124.1066612638983, 
807.3210932824164, 3087.543137020717, 3302.378070776832, 2522.7170517930463, 
3450.5925529642914, 3324.118017137632, 3170.446410449056, 3144.9886445663683], 
"eval_len": [1000, 1000, 265, 1000, 1000, 1000, 1000, 1000, 894, 1000]}

 69%|██████▉   | 689995/1000000 [5:51:12<1:42:58, 50.17it/s]global step 690000, trans_decision ep_re 3023.070826398149

{"global_step": 690000, "eval_re": [3244.055964536067, 2174.8996676948404, 
2988.360022913794, 2845.9554134905416, 3099.1347410648273, 3305.9110718541424, 
3215.662831479358, 3038.724026870115, 3121.487037726162, 3196.5174863516445], 
"eval_len": [1000, 806, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699995/1000000 [5:56:02<1:39:22, 50.31it/s]global step 700000, trans_decision ep_re 2287.2582060598247

{"global_step": 700000, "eval_re": [2621.4443507311553, 22.048843662958472, 
2697.845503514024, 2852.551535939532, 2717.8193403788314, 2218.407750717098, 
2207.237945721683, 2814.2061357923462, 2050.3640264501482, 2670.656627690471], 
"eval_len": [1000, 31, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709995/1000000 [6:00:52<1:36:27, 50.11it/s]global step 710000, trans_decision ep_re 1893.8942666747248

{"global_step": 710000, "eval_re": [2874.0405969778963, 2915.641066764144, 
1548.0201511348735, 2019.7205935101672, 471.69946200150713, 2909.5282767710087, 
1959.9140227275334, 795.2101788837844, 2031.5476717854674, 1413.6206461908666], 
"eval_len": [1000, 1000, 612, 638, 238, 1000, 840, 1000, 697, 1000]}

 72%|███████▏  | 719997/1000000 [6:05:32<1:33:11, 50.07it/s]global step 720000, trans_decision ep_re 773.805214224356

{"global_step": 720000, "eval_re": [1185.208359479683, 72.34971066799741, 
687.6818024700151, 685.2215503223624, 211.34134253575922, 137.05630668037804, 
1557.1160825170882, 1433.2374703508412, 1130.8533994752738, 637.9861177441612], 
"eval_len": [421, 95, 1000, 301, 81, 59, 685, 499, 1000, 203]}

 73%|███████▎  | 729995/1000000 [6:10:12<1:29:34, 50.24it/s]global step 730000, trans_decision ep_re 2533.0786302332526

{"global_step": 730000, "eval_re": [1338.9058405285218, 3028.21317034, 
2583.702750897758, 3236.778051907862, 3160.7971578454885, 3287.570504581671, 
1378.8917531676866, 1349.7849715712275, 2539.865973459556, 3426.2761280327536], 
"eval_len": [1000, 1000, 878, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739995/1000000 [6:15:02<1:26:18, 50.21it/s]global step 740000, trans_decision ep_re 1755.0271427579914

{"global_step": 740000, "eval_re": [2114.7246282276333, 2713.713164958419, 
2765.1677308357266, 707.2319088998029, 2170.8859616588875, 3007.530021658049, 
889.6039148814419, 1430.2899310673, 1199.6158958087633, 551.5082695838919], 
"eval_len": [1000, 1000, 1000, 326, 1000, 1000, 300, 1000, 414, 1000]}

 75%|███████▍  | 749997/1000000 [6:19:42<1:22:51, 50.29it/s]global step 750000, trans_decision ep_re 2632.746299408346

{"global_step": 750000, "eval_re": [3043.367663923304, 2295.4197528931795, 
2805.0115980907476, 2918.8350861759086, 3139.00573837608, 2585.260318520844, 
1301.817919875073, 2829.120858957617, 2402.1484957273074, 3007.475561543402], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 480, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [6:24:32<1:19:52, 50.08it/s]global step 760000, trans_decision ep_re 1387.1347418440778

{"global_step": 760000, "eval_re": [3084.0453789901244, 72.38046923599973, 
600.5722149522593, 2458.260835337035, 550.0826590634884, 1058.5353847284314, 
1612.0516171485335, 1806.5636681600063, 2021.913312414149, 606.9418784107497], 
"eval_len": [1000, 1000, 329, 724, 162, 1000, 503, 647, 665, 207]}

 77%|███████▋  | 769999/1000000 [6:29:12<1:16:16, 50.26it/s]global step 770000, trans_decision ep_re 1465.8613908780505

{"global_step": 770000, "eval_re": [2791.327308013351, 2258.363563555521, 
653.913663707998, 1032.9086305714848, 3082.2882906296904, 376.59427075594994, 
159.9494822703536, 809.5177712354348, 2733.995073189743, 759.7558548509805], 
"eval_len": [892, 1000, 188, 365, 1000, 165, 87, 362, 905, 253]}

 78%|███████▊  | 779997/1000000 [6:33:42<1:12:57, 50.25it/s]global step 780000, trans_decision ep_re 2342.94213520971

{"global_step": 780000, "eval_re": [3064.5104540322864, 950.7039795392739, 
1602.954830086597, 2849.618650941546, 829.8663321238763, 3327.897776033492, 
2675.8722756575085, 2964.5144171125116, 2473.9862726308806, 2689.4963639391267],
"eval_len": [1000, 281, 1000, 1000, 277, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789995/1000000 [6:38:32<1:09:31, 50.34it/s]global step 790000, trans_decision ep_re 2909.9918432185286

{"global_step": 790000, "eval_re": [3229.252371262464, 2017.1707034955896, 
3129.571765378023, 3135.566086373755, 3045.09224763986, 2993.9422682637232, 
3216.5698156655735, 3102.1994748767916, 2613.76751404067, 2616.7861851888356], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799995/1000000 [6:43:04<1:06:27, 50.15it/s]global step 800000, trans_decision ep_re 929.3115847407983

{"global_step": 800000, "eval_re": [125.41767042131194, 961.8731505853511, 
838.2099777486679, 1031.0530036819266, 464.7336973532275, 913.3878999986997, 
663.926813216888, 1603.4240594974076, 393.1034961412791, 2297.9860787632215], 
"eval_len": [48, 415, 358, 465, 157, 390, 249, 730, 141, 844]}

 81%|████████  | 809997/1000000 [6:47:52<1:03:29, 49.87it/s]global step 810000, trans_decision ep_re 786.1054483042146

{"global_step": 810000, "eval_re": [9.537941752357924, -506.54248772253067, 
8.684916416369717, 729.4014495046425, 446.5038667134219, 174.21754046865811, 
1899.403834512853, 968.2350774250033, 2093.7238227736807, 2037.8885211976906], 
"eval_len": [18, 1000, 24, 289, 1000, 227, 668, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [6:52:32<1:00:01, 49.98it/s]global step 820000, trans_decision ep_re 1087.0928244537122

{"global_step": 820000, "eval_re": [2626.093763191454, 2574.802167461327, 
401.89041366540135, 392.8087866003329, 271.90804023595814, 1088.5485428859292, 
1118.8195691257833, 249.6413235467261, 1063.4954059496115, 1082.920231874595], 
"eval_len": [1000, 1000, 169, 131, 171, 1000, 502, 88, 434, 1000]}

 83%|████████▎ | 829997/1000000 [6:57:02<56:16, 50.35it/s]global step 830000, trans_decision ep_re 2529.60845980963

{"global_step": 830000, "eval_re": [2698.2993966854324, 2502.8837615818443, 
2480.7328250588057, 2737.2758806196985, 2578.60137773679, 2411.123532914707, 
2678.6070050189473, 2324.3057237243897, 2562.789847405071, 2321.465247350613], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [7:01:52<53:01, 50.30it/s]global step 840000, trans_decision ep_re 1292.9903196724156

{"global_step": 840000, "eval_re": [45.85237631441721, 819.7236671438092, 
2570.5977697487765, 90.17577622543752, 368.25621256649487, 2607.257559667797, 
1692.3211484156063, 764.5125115789998, 2703.192736432241, 1268.0134386305751], 
"eval_len": [27, 627, 1000, 48, 244, 1000, 639, 291, 1000, 1000]}

 85%|████████▍ | 849995/1000000 [7:06:32<49:47, 50.20it/s]global step 850000, trans_decision ep_re 1202.525132732302

{"global_step": 850000, "eval_re": [1437.172560172749, 282.80956061786486, 
1658.560150737845, 635.1288168482193, 2702.952025672207, 2305.798594280602, 
270.8571350286273, 918.6218002708738, 610.4199382333725, 1202.9307454606592], 
"eval_len": [609, 121, 527, 223, 1000, 1000, 118, 1000, 254, 466]}

 86%|████████▌ | 859995/1000000 [7:11:02<46:32, 50.14it/s]global step 860000, trans_decision ep_re 2205.1139990721763

{"global_step": 860000, "eval_re": [2695.6223379508237, 2900.7256619446466, 
3124.396057824577, 2717.1264722949, 919.615559751334, 2136.930708523736, 
2970.6211360121047, 2645.071013739909, 851.8771917991113, 1089.153850880621], 
"eval_len": [1000, 1000, 1000, 1000, 315, 1000, 1000, 1000, 1000, 458]}

 87%|████████▋ | 869997/1000000 [7:15:52<43:20, 49.99it/s]global step 870000, trans_decision ep_re 1061.2654996311144

{"global_step": 870000, "eval_re": [1711.1977509369092, 2496.815883765869, 
1310.072421604618, 988.9908866395928, 1137.9248635615, 1084.1776481702666, 
-59.38487635439043, 956.165541687961, 53.68629265999717, 933.0085836388184], 
"eval_len": [853, 1000, 739, 1000, 393, 530, 215, 1000, 30, 408]}

 88%|████████▊ | 879999/1000000 [7:20:32<39:50, 50.20it/s]global step 880000, trans_decision ep_re 1633.9699127791032

{"global_step": 880000, "eval_re": [257.41978866095377, 2359.1675863369237, 
2736.1196779064685, 1740.5462538273684, 417.05951344161144, 1853.3377995888682, 
2046.0154752120452, 3006.088136764324, 707.0955479556494, 1216.8493480968202], 
"eval_len": [93, 868, 1000, 672, 127, 844, 733, 1000, 1000, 449]}

 89%|████████▉ | 889999/1000000 [7:25:12<36:27, 50.29it/s]global step 890000, trans_decision ep_re 1392.6412811897412

{"global_step": 890000, "eval_re": [497.75287459591266, 1960.7226613094792, 
2210.3162404390755, 1113.7545459175428, 214.6185599395694, 1542.9243672013235, 
2501.1639537593487, 2035.0621067006357, 1753.7315851411115, 96.36591689341267], 
"eval_len": [224, 1000, 1000, 645, 145, 635, 1000, 991, 1000, 84]}

 90%|████████▉ | 899999/1000000 [7:29:52<32:57, 50.57it/s]global step 900000, trans_decision ep_re 1595.2829394074865

{"global_step": 900000, "eval_re": [628.4259698668714, 1567.8628843743058, 
1093.5414169603225, 522.1949092132611, 822.57292058576, 2616.082584714329, 
2489.7002199750823, 1130.2643061162178, 2556.232772036227, 2525.951410232491], 
"eval_len": [277, 1000, 427, 259, 366, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [7:34:32<29:56, 50.10it/s]global step 910000, trans_decision ep_re 2324.011292817993

{"global_step": 910000, "eval_re": [2588.486625764237, 3254.141185436281, 
1331.1049547709652, 3058.401793008382, 748.47760123646, 308.80245874111455, 
2399.0907911122113, 3220.8723103959487, 3207.394584083909, 3123.34062363042], 
"eval_len": [1000, 1000, 400, 1000, 1000, 158, 894, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [7:39:12<26:39, 50.02it/s]global step 920000, trans_decision ep_re 1614.6178228337828

{"global_step": 920000, "eval_re": [1180.5444314025763, 2128.19129784943, 
376.0260704316242, 2781.7074037063035, 2093.8797664230738, 2267.856654595571, 
1101.1107037116738, 2464.879615976869, 1022.614429722268, 729.3678545184417], 
"eval_len": [387, 639, 140, 1000, 803, 1000, 519, 1000, 396, 230]}

 93%|█████████▎| 929995/1000000 [7:43:52<23:24, 49.84it/s]global step 930000, trans_decision ep_re 2505.4871880023097

{"global_step": 930000, "eval_re": [3201.624826096786, 1883.565696050881, 
2825.297795821273, 2771.362300786883, 2633.362896701847, 2774.317206627303, 
2793.9190982508503, 1182.3197848041493, 2532.16297205219, 2456.939302830935], 
"eval_len": [1000, 730, 1000, 917, 920, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939997/1000000 [7:48:42<20:10, 49.55it/s]global step 940000, trans_decision ep_re 1744.4003348875128

{"global_step": 940000, "eval_re": [2386.6159429608397, 1004.9879056085849, 
3185.105867383351, 605.4275899885972, 1007.887253003493, 1884.5320211751393, 
3065.590440724966, 2836.498394320607, 994.3663780221542, 472.9915556873959], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 147]}

 95%|█████████▍| 949999/1000000 [7:53:32<16:43, 49.84it/s]global step 950000, trans_decision ep_re 1972.2999006937266

{"global_step": 950000, "eval_re": [3455.9664265320516, 1396.6963600440279, 
370.5292607939964, 1757.571698137476, 3252.564963783072, 3015.11415244718, 
1565.4880340126708, 2496.746842145996, 804.5678274606772, 1607.7534415801174], 
"eval_len": [1000, 391, 105, 1000, 1000, 1000, 1000, 1000, 274, 466]}

 96%|█████████▌| 959999/1000000 [7:58:22<13:23, 49.78it/s]global step 960000, trans_decision ep_re 1244.14201620291

{"global_step": 960000, "eval_re": [584.7756567879073, 2118.8196185108363, 
451.66629908316395, 480.99068949199716, 698.7650267189184, 3025.3828111674766, 
21.774988607747648, 3023.2631980214023, 1866.1170558203642, 169.8648178192844], 
"eval_len": [230, 1000, 207, 173, 280, 1000, 18, 888, 1000, 73]}

 97%|█████████▋| 969999/1000000 [8:03:02<10:03, 49.70it/s]global step 970000, trans_decision ep_re 1676.507495896408

{"global_step": 970000, "eval_re": [291.9636837308537, 1153.0759313292176, 
1855.2582253580242, 820.7871158898212, 2912.3732861600915, 2716.8838079557318, 
1798.722182961872, 1075.9842263409198, 1028.842049361487, 3111.1844498760624], 
"eval_len": [128, 409, 710, 1000, 1000, 1000, 570, 1000, 444, 1000]}

 98%|█████████▊| 979995/1000000 [8:07:42<06:41, 49.80it/s]global step 980000, trans_decision ep_re 1615.1137396041322

{"global_step": 980000, "eval_re": [1539.8293667249486, 1038.5600970434393, 
1380.969089451543, 2414.8030628394945, 660.1067451161204, 2698.210089718628, 
396.09070406793415, 1936.7946111763642, 2228.7881871164104, 1856.9854427864402],
"eval_len": [555, 406, 602, 991, 264, 1000, 218, 590, 813, 640]}

 99%|█████████▉| 989995/1000000 [8:12:22<03:21, 49.70it/s]global step 990000, trans_decision ep_re 2012.4845207922979

{"global_step": 990000, "eval_re": [1576.4182707996813, 3643.654675414976, 
2906.0678154550433, 1369.739382547306, 1620.014007292239, 1259.7585215865386, 
130.9783118627158, 1844.4587995853776, 2798.5956010814716, 2975.1598222976277], 
"eval_len": [512, 1000, 1000, 429, 1000, 1000, 48, 551, 1000, 1000]}

100%|█████████▉| 999997/1000000 [8:17:12<00:00, 49.48it/s]global step 1000000, trans_decision ep_re 2310.8089768668497

{"global_step": 1000000, "eval_re": [3215.282920216845, 3171.614756221347, 
2640.9989665330995, 3148.0916987981127, 3363.6452185871485, 3215.0958962206846, 
961.2286355948762, 644.1995667647874, -6.768052674265643, 2754.700162405861], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 307, 248, 10, 1000]}

100%|██████████| 1000000/1000000 [8:17:21<00:00, 33.51it/s]
