
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9996/1000000 [03:20<6:31:04, 42.19it/s]global step 10000, trans_decision ep_re -160.47242194275208

{"global_step": 10000, "eval_re": [-228.64714078911655, -57.526981207939635, 
-68.0714118250387, -222.77344797363043, -221.1672828250802, -43.803270136203665,
-44.416827123050176, -278.72451187481465, -219.99528931825068, 
-219.5980563543963], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19996/1000000 [09:30<7:45:41, 35.07it/s]global step 20000, trans_decision ep_re 804.5325727244738

{"global_step": 20000, "eval_re": [824.9120953175947, 600.291878302157, 
404.6450501188115, 1266.7662175756816, 1198.5914391200618, 860.4410572849395, 
662.7084149275532, 914.4383248595876, 811.7572435063723, 500.7740062319778], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29995/1000000 [15:50<6:28:55, 41.57it/s]global step 30000, trans_decision ep_re 1976.307203894265

{"global_step": 30000, "eval_re": [2401.950753725371, 2645.004098353838, 
3079.97274447114, 1156.0472794002962, 1789.4950899978169, 861.4661755802294, 
2459.4258982073293, 2087.4415925596636, 434.5458941181131, 2847.7225125288514], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [22:10<6:27:10, 41.32it/s]global step 40000, trans_decision ep_re 1322.8714177413792

{"global_step": 40000, "eval_re": [1260.649840587595, 1771.7145226288842, 
1907.7974209018716, 1112.4218200606863, 938.82800139448, 2183.8320245988148, 
1769.8914378824238, 784.2416833107657, 752.2690106123188, 747.0684154359521], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49995/1000000 [28:30<5:56:48, 44.38it/s]global step 50000, trans_decision ep_re 1400.3630155757921

{"global_step": 50000, "eval_re": [412.26278764089784, 1635.0930938401739, 
1297.9773266837074, 1426.909368744387, 880.3934791255629, 2997.9604621631784, 
1197.073749129007, 958.5792004980635, 2435.686123349563, 761.6945645833802], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [34:30<5:47:32, 45.08it/s]global step 60000, trans_decision ep_re 2459.9024694539194

{"global_step": 60000, "eval_re": [1161.9445933621148, 4136.316881991471, 
3970.5500668366885, 4063.9890812895824, 883.3478396228531, 1439.68748170022, 
2727.1926014455057, 1263.6916140131511, 979.254475700355, 3973.0500585772497], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [40:40<6:21:01, 40.68it/s]global step 70000, trans_decision ep_re 1787.4430641941526

{"global_step": 70000, "eval_re": [1875.4381132474052, 1756.965972030348, 
4073.6606607443587, 1405.0435683528372, 2108.47706254055, 974.0310190975598, 
1088.0537727163028, 1175.847911338565, 1195.598353876716, 2221.3142079968825], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79998/1000000 [47:00<6:12:22, 41.18it/s]global step 80000, trans_decision ep_re 2874.732035209348

{"global_step": 80000, "eval_re": [4078.4641047216137, 2651.639062734414, 
2767.3696326382287, 2250.3266243691996, 4177.517426063149, 3513.2054785980254, 
3060.768569563582, 1597.7203796042718, 2391.549374495003, 2258.759699305991], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89995/1000000 [53:10<6:00:19, 42.09it/s]global step 90000, trans_decision ep_re 2290.0805842770346

{"global_step": 90000, "eval_re": [1495.9301969398891, 1064.982185067084, 
3938.549751445109, 3765.8081765463457, 3485.8375986795363, 1054.8925814207535, 
1982.8898813141207, 1921.2881117764468, 2463.49557411625, 1727.1317854648112], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [59:00<5:53:18, 42.46it/s]global step 100000, trans_decision ep_re 2177.2485886689233

{"global_step": 100000, "eval_re": [3534.642201265033, 1867.1782193720985, 
1292.2252969811448, 1745.5902269748574, 3305.2164412261313, 2762.7828663044584, 
1022.5608616671985, 1659.6431529577985, 4161.590546967078, 421.056072973436], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109995/1000000 [1:04:50<5:48:58, 42.50it/s]global step 110000, trans_decision ep_re 2370.9616192410876

{"global_step": 110000, "eval_re": [1854.5516128365168, 2575.1904919603253, 
2716.977982244457, 1363.9802448699247, 1423.0745924231821, 4009.810885689281, 
4232.567486340553, 2205.857904918089, 2295.331688359372, 1032.273302769176], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119995/1000000 [1:10:30<5:45:53, 42.40it/s]global step 120000, trans_decision ep_re 3639.231164757708

{"global_step": 120000, "eval_re": [4687.584611952122, 4037.616574111417, 
2776.304158062378, 3736.4367323634588, 3743.647987647256, 1283.1579920866754, 
4274.395790961935, 4646.309773129469, 3753.592723099998, 3453.2653041623744], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129995/1000000 [1:16:20<5:44:23, 42.10it/s]global step 130000, trans_decision ep_re 2245.7698127736226

{"global_step": 130000, "eval_re": [1225.1569571803532, 1150.4713990790276, 
3875.4476327608254, 1439.8635401242682, 1549.6538631876097, 4263.594669425291, 
1243.5994311517704, 1920.4911653622523, 2248.4926196370784, 3540.926849827749], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [1:22:10<5:37:44, 42.44it/s]global step 140000, trans_decision ep_re 2983.527077280966

{"global_step": 140000, "eval_re": [3796.96283256648, 2330.3524652795754, 
4704.493165865165, 2862.3780194497544, 3792.518523388033, 3655.753724500677, 
1816.0198677024043, 1111.0703787333905, 3989.0965946543674, 1776.6252006698119],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149995/1000000 [1:27:50<5:33:40, 42.46it/s]global step 150000, trans_decision ep_re 2512.445704540466

{"global_step": 150000, "eval_re": [2158.3764869352967, 2217.9346175834226, 
1980.823062112872, 3236.452239832578, 4332.205403913855, 1467.5811966091012, 
1242.5515145593733, 2454.97851907687, 3372.1091139903906, 2661.4448907908977], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159999/1000000 [1:33:40<5:29:50, 42.44it/s]global step 160000, trans_decision ep_re 2325.269978025616

{"global_step": 160000, "eval_re": [1885.2125351774814, 2958.767181503541, 
1301.2745948776596, 1732.5658415895448, 3902.1017822020667, 2735.3263483318656, 
2370.33593438669, 2086.2103411774683, 2347.652438965633, 1933.2527820442126], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169995/1000000 [1:39:20<5:24:36, 42.61it/s]global step 170000, trans_decision ep_re 2464.576179743056

{"global_step": 170000, "eval_re": [3821.158356039109, 2986.967817530234, 
1697.7273952727444, 3019.3304637431343, 2059.7678898325, 2646.326253264733, 
2992.9328404901567, 2058.470667435586, 1646.684868769634, 1716.395245052732], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [1:45:10<5:22:32, 42.37it/s]global step 180000, trans_decision ep_re 2560.886591953792

{"global_step": 180000, "eval_re": [3005.4333648971688, 2514.7324072561387, 
2084.2742342952024, 1672.9624023955607, 2991.7682282861574, 3107.8042284773633, 
1500.133000624262, 1766.5507072782586, 3264.786688204488, 3700.420657823326], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189995/1000000 [1:50:50<5:19:02, 42.31it/s]global step 190000, trans_decision ep_re 3062.416957751779

{"global_step": 190000, "eval_re": [3249.004564578422, 3042.4500007619076, 
3036.0853403645847, 3141.0654069758434, 2886.2842835839897, 3026.5622188433636, 
3153.5282506393196, 3085.1766837335967, 2852.0380407533135, 3151.9747872834523],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199995/1000000 [1:56:40<5:15:11, 42.30it/s]global step 200000, trans_decision ep_re 3557.1847651421435

{"global_step": 200000, "eval_re": [4804.486267782033, 4487.097557353688, 
2606.392108637532, 4072.9725461378766, 3993.121963842854, 4489.746236203075, 
2855.115761935647, 2937.121839349615, 3183.597426252797, 2142.1959439263183], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209995/1000000 [2:02:20<5:09:24, 42.55it/s]global step 210000, trans_decision ep_re 3402.2865610477857

{"global_step": 210000, "eval_re": [4080.434217662769, 3364.4892767105453, 
3823.733706319558, 3975.349687095264, 2269.249713489411, 3008.281137878481, 
3883.7047113252743, 2761.043621557327, 3519.7051870115656, 3336.8743514276653], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219996/1000000 [2:08:20<6:33:09, 33.07it/s]global step 220000, trans_decision ep_re 3171.669328761999

{"global_step": 220000, "eval_re": [1501.936821096795, 3688.7148272182058, 
4043.52489360728, 4071.572780602369, 3377.475921856009, 3615.784548712948, 
3856.15725734494, 4016.2124083115878, 1439.166289536532, 2106.147539333322], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229998/1000000 [2:14:20<6:10:29, 34.64it/s]global step 230000, trans_decision ep_re 3050.1779285893253

{"global_step": 230000, "eval_re": [4342.232694803363, 4043.8240530334665, 
2551.6361333727746, 1997.0473430664115, 4175.900141769297, 1861.3512209545452, 
2903.5764074618937, 3843.316366640673, 2927.0030615873, 1855.8918632035281], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239996/1000000 [2:20:20<6:01:39, 35.02it/s]global step 240000, trans_decision ep_re 1656.0526246262912

{"global_step": 240000, "eval_re": [2529.7218466044797, 380.5955866296791, 
346.2343638290814, 1959.8341640091303, 1365.2057896316646, 1408.3852921854761, 
2708.0994852883673, 2388.633639179641, 2016.1286340051245, 1457.6874449002655], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [2:26:20<6:17:30, 33.11it/s]global step 250000, trans_decision ep_re 2694.705745942944

{"global_step": 250000, "eval_re": [3989.6121663066833, 1507.0992997881058, 
4242.191282127404, 2887.5900458173137, 4273.319813384139, 1771.352578587521, 
1840.5479880413925, 1643.825957071834, 2967.2645464947304, 1824.2537818103124], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [2:32:00<4:49:08, 42.66it/s]global step 260000, trans_decision ep_re 2818.2759838704605

{"global_step": 260000, "eval_re": [2631.0945670140645, 2029.823616212103, 
3414.643389844026, 3054.9100608596996, 3155.3113604808395, 1208.6029543044813, 
3604.4652979307116, 4011.800517546591, 2535.793225202098, 2536.3148493099893], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269995/1000000 [2:37:40<4:34:05, 44.39it/s]global step 270000, trans_decision ep_re 3335.8174154105327

{"global_step": 270000, "eval_re": [3392.888842688096, 3274.403341793187, 
3350.4829547072836, 3417.5946669478535, 3194.5025075693366, 3420.638789577696, 
3371.2956745964293, 3196.6911887307187, 3416.2172463108523, 3323.458941183874], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279995/1000000 [2:43:11<4:29:37, 44.51it/s]global step 280000, trans_decision ep_re 2776.9655339819287

{"global_step": 280000, "eval_re": [2888.1572596395536, 1710.856746775291, 
1832.8212574351044, 1909.9307367166089, 4182.254329665333, 2824.2170074488404, 
2658.536465926661, 4149.713666269666, 1393.9183568968554, 4219.249513045375], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289997/1000000 [2:48:51<4:51:54, 40.54it/s]global step 290000, trans_decision ep_re 2025.8092800521747

{"global_step": 290000, "eval_re": [1506.0407520290844, 2304.3957397817926, 
2168.7117491940367, 4321.535747109174, 2144.7746332522606, 1722.1407308507303, 
1768.4654711836415, 1278.9586823133568, 1557.5409418878444, 1485.5283529198289],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299996/1000000 [2:54:31<4:18:39, 45.10it/s]global step 300000, trans_decision ep_re 3413.8935775949185

{"global_step": 300000, "eval_re": [2694.350264405347, 2764.131751791964, 
4191.302404564654, 2059.812791510681, 3628.151799648946, 2694.8419292575672, 
4229.0062794358955, 3936.8858857607665, 3412.9119201088174, 4527.540749464543], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309995/1000000 [3:00:21<4:31:47, 42.31it/s]global step 310000, trans_decision ep_re 2404.3466630193357

{"global_step": 310000, "eval_re": [1812.6575889696762, 1458.4972536017754, 
2236.6542040781287, 2176.0031475483406, 3060.6738602817086, 3505.55608951955, 
1570.6488444283652, 2135.0957669354098, 3218.624448284436, 2869.0554265459696], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319998/1000000 [3:06:51<5:25:20, 34.84it/s]global step 320000, trans_decision ep_re 1941.7692967924645

{"global_step": 320000, "eval_re": [2336.7268991392807, 1538.913952644008, 
2162.3863410454896, 1994.5047033021383, 1532.0993295383773, 1696.912910896304, 
1712.9205693180515, 2492.074607222398, 2610.982700187173, 1340.1709546314216], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329995/1000000 [3:13:11<4:25:29, 42.06it/s]global step 330000, trans_decision ep_re 2207.716658282465

{"global_step": 330000, "eval_re": [1468.3340243056014, 3267.5976067254073, 
1398.4970445624642, 1752.9377324207753, 2139.4717405338956, 1726.0032330035588, 
2478.6535925534185, 2021.690941154023, 1872.787054457002, 3951.193613108502], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [3:19:11<6:40:13, 27.48it/s]global step 340000, trans_decision ep_re 3165.033184984547

{"global_step": 340000, "eval_re": [3402.725971615297, 3461.533899457731, 
3151.6812367933144, 3427.1246529591717, 3301.7256203060324, 3336.981527504634, 
1364.059439791145, 3396.9680035885417, 3401.1585286853924, 3406.372969144214], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349998/1000000 [3:25:11<4:22:42, 41.24it/s]global step 350000, trans_decision ep_re 2109.908628636471

{"global_step": 350000, "eval_re": [1789.1566222562005, 1624.7672225880253, 
2068.817697623358, 1720.0776125971904, 2468.5730991903206, 2681.8489145622552, 
1644.51440745406, 2554.011637559701, 1549.185974051006, 2998.1330984825954], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359997/1000000 [3:31:11<4:12:42, 42.21it/s]global step 360000, trans_decision ep_re 1735.8671515975952

{"global_step": 360000, "eval_re": [1653.075950479553, 1921.834242865208, 
1378.4843086697474, 1682.9037890910577, 2212.1429463352033, 1816.0185015635611, 
1558.2704510983394, 1483.7588052145315, 2146.0629282157997, 1506.1195924429487],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369997/1000000 [3:37:21<4:18:06, 40.68it/s]global step 370000, trans_decision ep_re 3831.9605219153746

{"global_step": 370000, "eval_re": [4350.82325936873, 4367.8807541512215, 
4488.006876860598, 4679.4108257238295, 4230.387064714649, 4057.019528892891, 
1746.0131782144715, 4499.138706628069, 2260.3117329691654, 3640.613291630119], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [3:43:11<3:51:19, 44.67it/s]global step 380000, trans_decision ep_re 2940.783259473291

{"global_step": 380000, "eval_re": [2837.121897432558, 3275.75010041241, 
3290.6196912688138, 3124.253630261203, 2993.4127854417256, 3208.4994715657654, 
3217.2621364382057, 1365.4579455674689, 3090.5894558358464, 3004.865480508914], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389995/1000000 [3:48:42<3:47:47, 44.63it/s]global step 390000, trans_decision ep_re 2700.9746944752633

{"global_step": 390000, "eval_re": [2710.844668079235, 1676.3521372088444, 
2505.0777879107013, 3464.6432161642606, 2698.4312725082455, 1654.809903327207, 
3637.745399565704, 3384.3087176106433, 1757.2498418772802, 3520.284000500518], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399995/1000000 [3:54:02<3:44:25, 44.56it/s]global step 400000, trans_decision ep_re 2291.1814704999733

{"global_step": 400000, "eval_re": [1540.4121293792798, 3406.3495358824716, 
1656.9455636337248, 2002.820063713334, 4194.352129009202, 2053.201227886575, 
2342.7927443677836, 1777.213490528583, 2521.871106029917, 1415.8567145688664], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409995/1000000 [3:59:32<3:39:37, 44.77it/s]global step 410000, trans_decision ep_re 2302.152408417942

{"global_step": 410000, "eval_re": [1779.2273616858308, 1883.8216353039716, 
2950.4347925192815, 1630.95102419137, 3372.8837259456654, 1637.244840015655, 
1951.4815614164752, 2309.5556846632217, 2483.0238707578224, 3022.8995876801264],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419995/1000000 [4:04:52<3:36:40, 44.62it/s]global step 420000, trans_decision ep_re 2893.992180267459

{"global_step": 420000, "eval_re": [3762.0021745319495, 3653.1425949719182, 
3026.670081842633, 3325.8832241664386, 2456.5736775054356, 3375.76771366804, 
2654.752747639189, 1569.6799754230442, 3417.126915183441, 1698.322697742499], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429995/1000000 [4:10:22<3:32:55, 44.62it/s]global step 430000, trans_decision ep_re 1881.3669916745087

{"global_step": 430000, "eval_re": [2406.376432208234, 1532.7513479279164, 
1368.295726808445, 1798.5206431379736, 1807.3809899908326, 1612.22443285512, 
2310.6933291154837, 1966.9377739428726, 1463.8968243216198, 2546.592416436588], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439995/1000000 [4:15:52<3:28:49, 44.69it/s]global step 440000, trans_decision ep_re 2252.2707882192726

{"global_step": 440000, "eval_re": [2586.1942310150002, 2192.369013940923, 
1891.52020711422, 2367.09601033767, 3382.002643989358, 2524.0771378321715, 
1630.5932378360199, 2253.0761913571946, 2210.4225414576968, 1485.356667312468], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449995/1000000 [4:21:13<3:25:56, 44.51it/s]global step 450000, trans_decision ep_re 2664.389786354485

{"global_step": 450000, "eval_re": [1334.2539102975754, 2644.531924202356, 
2568.7331728965946, 3216.6529008993957, 3183.341269592082, 2769.966259360515, 
3283.5149768396072, 2888.4935030554184, 1584.684953030335, 3169.724993370972], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459995/1000000 [4:26:43<3:21:32, 44.66it/s]global step 460000, trans_decision ep_re 2506.9186006754085

{"global_step": 460000, "eval_re": [2242.383096779299, 2975.98845948546, 
2078.682651969432, 3179.212573779492, 2328.0906311421154, 3013.4100415186963, 
1652.900030448517, 3323.144683224245, 2872.5057613396893, 1402.8680770671378], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469995/1000000 [4:32:03<3:18:05, 44.59it/s]global step 470000, trans_decision ep_re 2270.332772781222

{"global_step": 470000, "eval_re": [3206.6874350382586, 3044.665080431931, 
1592.7876920664655, 1674.3511056449624, 1494.4288168378212, 3432.5465583636133, 
1999.4111441922337, 1880.398055076409, 2080.7720726263606, 2297.2797675341617], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479995/1000000 [4:37:33<3:14:29, 44.56it/s]global step 480000, trans_decision ep_re 1785.377323080549

{"global_step": 480000, "eval_re": [1515.1068621189524, 1927.5742980631767, 
1480.869251401844, 1588.0247126255567, 1763.9516211345501, 1646.5809807508708, 
2055.018649841288, 1789.0107694248024, 1874.5208663323886, 2213.115219112062], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489995/1000000 [4:42:53<3:34:47, 39.57it/s]global step 490000, trans_decision ep_re 3093.506943835302

{"global_step": 490000, "eval_re": [3140.8617997216356, 3069.7606086998408, 
3366.151622163615, 3344.2695610400397, 3031.0388644772065, 2644.680542761428, 
3313.334097813061, 2926.5516235797118, 3138.8047476972533, 2959.6159703992307], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499995/1000000 [4:48:23<3:06:49, 44.61it/s]global step 500000, trans_decision ep_re 2569.513460901989

{"global_step": 500000, "eval_re": [3348.9150447204756, 2340.9131512619556, 
2670.8350524972047, 2309.8831950597973, 1621.2737818517828, 4055.142498970665, 
2825.024829056441, 2097.6851362494344, 2321.9732053102775, 2103.488714041857], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509995/1000000 [4:53:53<3:03:10, 44.59it/s]global step 510000, trans_decision ep_re 2813.7343417716265

{"global_step": 510000, "eval_re": [2891.7299862066047, 4018.1718414341462, 
3364.7241040032595, 3483.9100050765655, 1933.620626785389, 2634.9304215707702, 
2596.8080839316235, 1535.5930613126466, 3478.298905693255, 2199.556381702], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519995/1000000 [4:59:13<2:59:43, 44.51it/s]global step 520000, trans_decision ep_re 1872.9222716339857

{"global_step": 520000, "eval_re": [2579.205505619358, 1574.2246855758972, 
1452.6973622109517, 1756.032637167945, 2711.8467931759624, 1537.3835666855682, 
1462.4956435494566, 1690.8368893874456, 1829.7359220216567, 2134.763710945617], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529995/1000000 [5:04:43<2:55:27, 44.64it/s]global step 530000, trans_decision ep_re 2423.9453448181357

{"global_step": 530000, "eval_re": [1722.5816080302532, 1887.3695950161782, 
3329.6740849604967, 1582.7239168178846, 3507.0126626840247, 3419.60098043332, 
1364.4291076606528, 2615.8184854996366, 2676.4118923880133, 2133.8311146909], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [5:10:13<2:51:20, 44.75it/s]global step 540000, trans_decision ep_re 2381.061528224375

{"global_step": 540000, "eval_re": [2572.962528976362, 1863.7418888509271, 
4095.9920903488146, 1796.3776006916817, 1888.4097890401893, 1730.9306453491192, 
2389.1273953288796, 2720.344595814631, 1615.2378013232392, 3137.490946519906], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549995/1000000 [5:15:34<2:47:00, 44.91it/s]global step 550000, trans_decision ep_re 2270.109174221074

{"global_step": 550000, "eval_re": [1494.317022923802, 2548.781990104333, 
2085.0113360739, 2199.625593407903, 2364.022804526955, 2623.9269497801756, 
1631.272460281391, 3337.4086140272193, 2308.340557144739, 2108.384413940324], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559995/1000000 [5:21:04<2:42:47, 45.05it/s]global step 560000, trans_decision ep_re 2475.133561598577

{"global_step": 560000, "eval_re": [3454.068761435421, 2554.4236887888555, 
2266.217989819131, 1928.2525950159443, 1457.1838405742685, 1375.6562839928183, 
2948.423370746547, 3095.796014142606, 2774.52936819072, 2896.7837032794614], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569995/1000000 [5:26:24<2:39:20, 44.98it/s]global step 570000, trans_decision ep_re 2669.847643256676

{"global_step": 570000, "eval_re": [1889.2796907677248, 3224.741381562192, 
2555.3949200699117, 2303.006389693697, 2627.9912627056706, 2324.2824950788067, 
2629.772338335796, 3201.664657592326, 3173.7558029849515, 2768.5874937756835], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579995/1000000 [5:31:44<2:35:47, 44.93it/s]global step 580000, trans_decision ep_re 1830.2277858703487

{"global_step": 580000, "eval_re": [1888.2339156397697, 1510.7853387528005, 
2602.1008727690496, 2013.9203296616217, 2297.897289790694, 139.25815885546447, 
1837.494490349022, 2628.461825856175, 1896.9538127544918, 1487.1718242743998], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589995/1000000 [5:37:04<2:32:31, 44.80it/s]global step 590000, trans_decision ep_re 2352.563238729902

{"global_step": 590000, "eval_re": [1969.1823802047393, 3404.3324051309164, 
1711.2405680134884, 1725.9438558368563, 3300.4894482842706, 3493.4197460207984, 
1474.7055219685215, 1603.3465230476827, 2728.6673342941554, 2114.30460449759], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599995/1000000 [5:42:34<2:28:28, 44.90it/s]global step 600000, trans_decision ep_re 2539.7740042872574

{"global_step": 600000, "eval_re": [1742.1115635948586, 1428.020284853157, 
2042.8898440967962, 3271.2150951564554, 3425.8444038508505, 3451.3958763527808, 
2399.415305789307, 2306.4293683378637, 2251.034819302798, 3079.38348153771], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609995/1000000 [5:47:54<2:24:46, 44.90it/s]global step 610000, trans_decision ep_re 1978.37749977224

{"global_step": 610000, "eval_re": [1723.0022129214426, 1584.804103364321, 
2195.3936068775793, 1506.6906844641535, 1734.8721975198252, 1484.180386506049, 
1452.955644651295, 2282.0835821909163, 2632.436548916109, 3187.356030310712], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619995/1000000 [5:53:14<2:20:41, 45.02it/s]global step 620000, trans_decision ep_re 2294.949029681002

{"global_step": 620000, "eval_re": [1909.2079750205767, 1932.2659552451655, 
2729.212394487417, 3112.7107307408824, 2988.715969928092, 2664.2422502080058, 
2332.358337516958, 1554.792957454153, 1824.4382981585245, 1901.5454280502454], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629995/1000000 [5:58:34<2:17:25, 44.87it/s]global step 630000, trans_decision ep_re 2068.840983685726

{"global_step": 630000, "eval_re": [1349.0270838929512, 1405.1191155589595, 
1492.2871695206636, 1586.9154013093753, 3059.064609398957, 1604.653366758173, 
2053.0765469264793, 2326.870650721664, 2377.4800149480766, 3433.9158778219603], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639995/1000000 [6:03:54<2:13:24, 44.97it/s]global step 640000, trans_decision ep_re 2105.2749012742493

{"global_step": 640000, "eval_re": [1721.0643829340067, 2079.11879995684, 
2448.9632298414076, 3352.7977862591324, 1384.49988245572, 1708.8828435014834, 
1604.3386610227562, 3224.557116916018, 1503.086488832946, 2025.4398210221855], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649995/1000000 [6:09:14<2:10:09, 44.82it/s]global step 650000, trans_decision ep_re 1879.0276764932685

{"global_step": 650000, "eval_re": [1730.4249059467315, 2129.433120264699, 
1543.0818203687713, 1951.4865504567163, 3094.079820910618, 1741.067094561934, 
1750.8736795703237, 1603.4448090161538, 1623.4466832953426, 1622.9382805413952],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659995/1000000 [6:14:34<2:06:09, 44.92it/s]global step 660000, trans_decision ep_re 1746.990043514062

{"global_step": 660000, "eval_re": [2221.1281050584234, 1775.7171363305615, 
1739.8775609364066, 1743.4690709852077, 1746.2115059202388, 1599.6253763260477, 
1663.9771529747684, 1737.941108355652, 1603.0596864780705, 1638.8937317752464], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669995/1000000 [6:19:54<2:02:57, 44.73it/s]global step 670000, trans_decision ep_re 2673.2837555687306

{"global_step": 670000, "eval_re": [3205.2352574818383, 1764.7590499855178, 
2875.227742845823, 1989.450847227655, 2800.746851127078, 3315.6447966103087, 
2907.217389131596, 2855.917873417013, 2425.5326488281275, 2593.1050990323506], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679995/1000000 [6:25:14<1:58:55, 44.84it/s]global step 680000, trans_decision ep_re 2257.712523641752

{"global_step": 680000, "eval_re": [1382.7917768134707, 2782.593635996514, 
2766.4435518017044, 2317.7574244884327, 1966.486571542303, 1763.0308868369132, 
2836.723795998093, 1681.4991210156527, 2650.1225668741645, 2429.6759050502747], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689995/1000000 [6:30:35<1:54:49, 45.00it/s]global step 690000, trans_decision ep_re 1747.9597659712429

{"global_step": 690000, "eval_re": [1950.0525147404555, 1533.832884774143, 
1570.1268147906687, 1769.4597469028574, 1591.654774023669, 2453.879353601406, 
1003.8135568185268, 1631.6718911305866, 2330.8714094301367, 1644.2347134999784],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699995/1000000 [6:35:55<1:51:16, 44.94it/s]global step 700000, trans_decision ep_re 2214.566817980467

{"global_step": 700000, "eval_re": [2558.3937024064426, 2279.9051462825705, 
1698.7595032989339, 2393.0370791298615, 2383.2061105360876, 1237.9066361159335, 
1838.072210208872, 2105.235168638894, 2942.0849395613905, 2709.067683625684], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709995/1000000 [6:41:25<1:47:08, 45.12it/s]global step 710000, trans_decision ep_re 1817.94014722081

{"global_step": 710000, "eval_re": [1394.9968257165124, 3115.855497929741, 
1820.0621018090908, 1514.4627862147352, 1830.7095875860089, 1831.224487655541, 
1543.1456243340522, 1511.336957947878, 1491.0087893198408, 2126.5988136946994], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719995/1000000 [6:46:45<1:44:11, 44.79it/s]global step 720000, trans_decision ep_re 2149.8166826555603

{"global_step": 720000, "eval_re": [1423.2946135515779, 2082.333839369369, 
2117.2521848842252, 3371.5730244248543, 1581.6915202710315, 1994.8124410889407, 
1496.2405892549282, 3190.7144803986175, 2124.8255934560957, 2115.4285398559646],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729995/1000000 [6:52:15<1:40:00, 45.00it/s]global step 730000, trans_decision ep_re 2011.135654757461

{"global_step": 730000, "eval_re": [2129.449189987001, 2033.6300871565334, 
1740.258816366306, 1845.7359823802583, 1692.7654110519875, 1809.5348120309652, 
1570.9669909582965, 3369.4966974364343, 1437.6587284590046, 2481.859831747825], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739995/1000000 [6:57:36<1:36:35, 44.86it/s]global step 740000, trans_decision ep_re 2146.01417316404

{"global_step": 740000, "eval_re": [2872.3024964758742, 2462.71787308693, 
1627.7878014724165, 2403.2765436453496, 1368.2660121883998, 2294.0627029060897, 
1667.8032943999801, 2631.0368825516985, 1548.5548971624667, 2584.333227751193], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749995/1000000 [7:02:56<1:32:28, 45.06it/s]global step 750000, trans_decision ep_re 2018.2003858559171

{"global_step": 750000, "eval_re": [1743.4588602272013, 1797.9345578970176, 
1728.975575468892, 2192.3677590991824, 2999.6853586039038, 2468.808125573575, 
1829.400315671319, 2215.905588350675, 1487.4713306824954, 1717.9963869849087], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759995/1000000 [7:08:16<1:29:06, 44.89it/s]global step 760000, trans_decision ep_re 2117.114744483545

{"global_step": 760000, "eval_re": [1442.437450166161, 2322.8240925030263, 
1467.2540714715822, 2863.0562967963892, 3028.3273970065875, 1735.136034840553, 
1818.9553613169162, 3027.072491842198, 1780.8068894112705, 1685.2773594807709], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769995/1000000 [7:13:46<1:25:07, 45.03it/s]global step 770000, trans_decision ep_re 2001.305050840337

{"global_step": 770000, "eval_re": [2021.6769565106574, 1778.17164149322, 
1968.4436929185108, 3033.4266077153807, 1637.5506711133212, 1529.515131166387, 
2409.941826580704, 1897.4299375747053, 1681.5822341832863, 2055.3118091471965], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779995/1000000 [7:19:06<1:21:26, 45.03it/s]global step 780000, trans_decision ep_re 1527.244931133493

{"global_step": 780000, "eval_re": [2367.6302774898973, 1295.6547472335396, 
1424.8593245633642, 1477.2693356364628, 1445.7477839701914, 1623.3659495407942, 
1352.3774934101157, 1260.8106638943784, 1405.1774859836762, 1619.5562496125112],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789995/1000000 [7:24:26<1:18:07, 44.80it/s]global step 790000, trans_decision ep_re 1937.5753266868778

{"global_step": 790000, "eval_re": [1379.9673863751223, 2669.633854287118, 
1406.3873366261453, 2101.298993800003, 2294.345013596664, 1465.8855099498421, 
1694.0754841754758, 1800.458473723526, 2587.5064803960026, 1976.194733938877], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799995/1000000 [7:29:46<1:14:13, 44.91it/s]global step 800000, trans_decision ep_re -492.017729326858

{"global_step": 800000, "eval_re": [-520.3317117774783, -461.43271885512445, 
-492.87400296975716, -478.12570265087567, -515.9014926994516, 
-516.3031683792061, -488.0452790627693, -453.5073016605423, -497.63257505958825,
-496.0233401537868], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

 81%|████████  | 809995/1000000 [7:35:06<1:10:34, 44.87it/s]global step 810000, trans_decision ep_re 2228.542761528106

{"global_step": 810000, "eval_re": [2583.0225015764363, 1237.28970711565, 
2391.451742476052, 2636.1000390657127, 2285.6631442747403, 2056.7008547026144, 
1790.027726748869, 1591.9218937955743, 2829.466937740721, 2883.7830677846896], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819995/1000000 [7:40:26<1:07:46, 44.26it/s]global step 820000, trans_decision ep_re 1897.8257349287935

{"global_step": 820000, "eval_re": [2100.4550162126943, 1561.9277927923138, 
2824.1370375480446, 1407.973200911334, 1520.6731927286162, 1726.1222416173794, 
1662.784441144274, 2575.0765800874105, 2199.985136617964, 1399.1227096279051], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829995/1000000 [7:45:56<1:03:34, 44.57it/s]global step 830000, trans_decision ep_re 1604.7180396477856

{"global_step": 830000, "eval_re": [96.35803462257503, 1610.911008658342, 
1239.8609483073699, 3133.98272865728, 1483.0847740054912, 1838.8316350065961, 
2840.4731918464554, 1524.6232621711904, 390.5996828654709, 1888.455130337087], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839995/1000000 [7:51:16<1:00:01, 44.42it/s]global step 840000, trans_decision ep_re 2108.7856863334514

{"global_step": 840000, "eval_re": [2298.393938158246, 2666.377103190701, 
2252.605233556832, 1662.6440720311175, 1910.331223744651, 1742.9089386953144, 
2722.712577906762, 2473.839059870936, 1638.473853323037, 1719.5708628569157], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849995/1000000 [7:56:46<56:06, 44.56it/s]global step 850000, trans_decision ep_re 1981.4721036380136

{"global_step": 850000, "eval_re": [1555.0119140739316, 2442.75298910438, 
1755.336129651239, 1494.1546507861005, 1766.0140209655915, 2468.0278473400763, 
2628.9590818781903, 2004.1666747548886, 1473.2787307336885, 2227.0189970920505],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859995/1000000 [8:02:06<1:03:06, 36.98it/s]global step 860000, trans_decision ep_re 2395.883292769893

{"global_step": 860000, "eval_re": [1822.7791383697934, 2317.29357732807, 
2297.650258020041, 1774.6732585545788, 3184.067860013643, 2540.743545653221, 
1805.9451084362236, 2748.857037123413, 3157.006185968749, 2309.816958231197], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869995/1000000 [8:07:36<48:38, 44.55it/s]global step 870000, trans_decision ep_re 1773.8495222548113

{"global_step": 870000, "eval_re": [1683.8992847235652, 2270.13325913712, 
1964.78401756639, 1758.8889371952387, 1934.9982122461502, 1556.2622106911056, 
1852.6286521833676, 1688.8263858527498, 1752.65728369031, 1275.4169792621187], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879995/1000000 [8:12:56<44:51, 44.59it/s]global step 880000, trans_decision ep_re 1809.9630623824178

{"global_step": 880000, "eval_re": [1434.8691480535704, 2671.435196043913, 
2569.9070146268095, 1817.9882957895522, 2065.2287363641462, -46.38252152404305, 
1625.535592421893, 1611.5382258560574, 2119.497468293824, 2230.0134678984527], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889995/1000000 [8:18:26<41:04, 44.63it/s]global step 890000, trans_decision ep_re 2222.184134217196

{"global_step": 890000, "eval_re": [2631.035248635417, 1861.732573955355, 
2685.68547800021, 1824.4907670274313, 2730.30440561238, 1893.8154139785988, 
2302.0139198239467, 2381.1579183494273, 1535.3804169364905, 2376.225199852704], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899995/1000000 [8:23:46<37:25, 44.54it/s]global step 900000, trans_decision ep_re 2088.4851262603092

{"global_step": 900000, "eval_re": [2139.956419267621, 1428.8817716148083, 
2743.267099825432, 2500.5932050593447, 1354.600193053922, 2837.720301828154, 
1646.0414209201747, 2350.184002334781, 2249.8575600633403, 1633.749288635513], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909995/1000000 [8:29:07<33:41, 44.52it/s]global step 910000, trans_decision ep_re 2493.4054428905047

{"global_step": 910000, "eval_re": [2592.1913495660838, 2607.8124405904946, 
2788.738293972256, 2621.832150506092, 2577.8261161940004, 2673.4721090361486, 
2437.6461091332553, 1978.4533165431767, 2135.0059813267994, 2521.076562036746], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919995/1000000 [8:34:37<29:48, 44.74it/s]global step 920000, trans_decision ep_re 1967.8504579814828

{"global_step": 920000, "eval_re": [1396.2129492153138, 1947.6303804898114, 
2217.324986902451, 1330.7307285777058, 1848.6609308434417, 2122.9056825525718, 
1903.48392488745, 1967.8452729630728, 2300.3577967392916, 2643.3519266437174], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929995/1000000 [8:39:57<26:11, 44.54it/s]global step 930000, trans_decision ep_re 2055.8932806994058

{"global_step": 930000, "eval_re": [2389.395876326526, 2204.368068143851, 
1892.299734318315, 1557.4631889023804, 1404.6896975015395, 1829.7133863317638, 
2686.8868089094094, 2406.3109749035857, 2703.324520945331, 1484.4805507113547], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939995/1000000 [8:45:27<22:21, 44.73it/s]global step 940000, trans_decision ep_re 2568.754415000775

{"global_step": 940000, "eval_re": [2731.807469967848, 2876.3734128264264, 
2908.0963762750207, 2287.844975773477, 1460.7975800200452, 2724.139251740552, 
2950.371572779318, 2329.5036977026984, 2651.4133448037983, 2767.196468118568], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949995/1000000 [8:50:47<18:41, 44.60it/s]global step 950000, trans_decision ep_re 1929.0408613509073

{"global_step": 950000, "eval_re": [2643.1672800490214, 2620.088649137972, 
1751.2325475308296, -267.9736512571966, 2142.9426937450025, 2156.657838200376, 
2676.7691466122847, 1652.6091414237014, 1915.6339730242198, 1999.2809950428632],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959995/1000000 [8:56:07<14:55, 44.68it/s]global step 960000, trans_decision ep_re 2092.272238717821

{"global_step": 960000, "eval_re": [1450.4061949974655, 2523.760289933773, 
2327.2983501017084, 2215.042856199195, 2329.525178981241, 2123.0545149619056, 
2508.5931040487367, 1548.5166378800784, 1681.3693008642765, 2215.1559592098274],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969996/1000000 [9:02:07<13:50, 36.14it/s]global step 970000, trans_decision ep_re 2391.2985497528452

{"global_step": 970000, "eval_re": [1595.2212807707149, 2482.158091313083, 
2955.8771812972623, 1632.0233362544693, 2689.457700062923, 2605.9001946074654, 
2566.1713125877422, 2894.880772549233, 1632.9576768627512, 2858.337951222809], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979998/1000000 [9:08:17<07:23, 45.13it/s]global step 980000, trans_decision ep_re 1943.9246232847167

{"global_step": 980000, "eval_re": [1568.5459703034746, 1706.1929627532243, 
1596.0110568042783, 1784.2815009170824, 1428.7483519290379, 2573.6327311861824, 
2835.2781795684823, 1715.8229232343872, 1853.5533504637158, 2377.1792056873014],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989997/1000000 [9:14:07<04:00, 41.65it/s]global step 990000, trans_decision ep_re 2133.492383828178

{"global_step": 990000, "eval_re": [1774.1073568013949, 2427.940545215875, 
2544.007170881328, 1587.7053288829004, 2094.960626825916, 2871.9201149468686, 
1730.3721533796665, 1548.0132800169613, 2006.8637498170517, 2749.0335115138164],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999996/1000000 [9:20:07<00:00, 35.67it/s]global step 1000000, trans_decision ep_re 1924.7830043645852

{"global_step": 1000000, "eval_re": [1850.3127386242486, 2092.283142671672, 
1670.5643333960934, 2555.637084918777, 1501.8331062822267, 2429.873752655141, 
1807.9387493968966, 1871.979884154176, 1769.7807810169793, 1697.6264705296417], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [9:20:32<00:00, 29.73it/s]
