
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:00<6:24:13, 42.94it/s]global step 10000, trans_decision ep_re 351.6901464106652

{"global_step": 10000, "eval_re": [251.2248443842283, 314.7276687004524, 
450.15537920229076, 253.07881727426232, 484.8741077828388, 252.98543140062688, 
382.6955006462388, 277.89963674590643, 521.83574002592, 327.4243379438871], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19998/1000000 [08:40<6:18:25, 43.16it/s]global step 20000, trans_decision ep_re 793.2346795575279

{"global_step": 20000, "eval_re": [694.2252974007514, 632.3060309095953, 
665.0924430737841, 884.0505009272766, 849.1120823624535, 886.3956304851812, 
636.7969189979929, 891.6101735260044, 881.4806715080475, 911.2770463841914], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 820, 1000, 1000, 1000]}

  3%|▎         | 29995/1000000 [14:20<6:19:08, 42.64it/s]global step 30000, trans_decision ep_re 550.9313533373113

{"global_step": 30000, "eval_re": [716.742091186343, 80.91085967966448, 
741.634536899825, 648.2464245288006, 254.72417568562713, 734.3542435624107, 
706.9332833692162, 735.2940547924557, 91.23357970766662, 799.2402839611028], 
"eval_len": [1000, 80, 1000, 1000, 295, 1000, 694, 1000, 93, 1000]}

  4%|▍         | 39995/1000000 [20:00<6:11:14, 43.10it/s]global step 40000, trans_decision ep_re 664.2027990819099

{"global_step": 40000, "eval_re": [722.3197056364028, 704.4538946489073, 
703.491771281559, 751.7319450965996, 752.5581295138718, 713.2656401165642, 
770.8743154936394, 679.5109545339152, 721.416794664624, 122.40483983301577], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 165]}

  5%|▍         | 49997/1000000 [25:30<6:11:57, 42.57it/s]global step 50000, trans_decision ep_re 616.8947974381399

{"global_step": 50000, "eval_re": [1168.730248447185, 169.7625951908893, 
203.06567775826983, 1003.8033152078964, 197.7813305961441, 475.07344055829464, 
1085.2565567309944, 649.772500129754, 877.9743347140214, 337.72797504795057], 
"eval_len": [1000, 105, 164, 1000, 123, 326, 1000, 492, 1000, 217]}

  6%|▌         | 59995/1000000 [31:00<6:04:16, 43.01it/s]global step 60000, trans_decision ep_re 546.874396011141

{"global_step": 60000, "eval_re": [962.8554094910338, 375.50724804170994, 
587.8758848436886, 432.4931497305392, 63.61679462234072, 956.0996396319993, 
701.7816216233489, 181.10579799980604, 50.28238938866211, 1157.1260247382813], 
"eval_len": [846, 309, 1000, 375, 68, 718, 1000, 136, 43, 828]}

  7%|▋         | 69998/1000000 [36:30<5:55:30, 43.60it/s]global step 70000, trans_decision ep_re 971.6168141999273

{"global_step": 70000, "eval_re": [1766.6860725466552, 803.5513039731686, 
1106.6567983163159, 575.3625305357473, 520.4821801241421, 1527.414971207506, 
1792.736845701939, 840.6318693071063, 434.18230656702434, 348.46326371966893], 
"eval_len": [1000, 549, 1000, 304, 296, 1000, 1000, 478, 247, 203]}

  8%|▊         | 79995/1000000 [42:00<5:56:48, 42.97it/s]global step 80000, trans_decision ep_re 1095.8342154966995

{"global_step": 80000, "eval_re": [1240.6868085114509, 760.7881431868298, 
1686.7989390113196, 2045.4599050509366, 59.04890688976736, 39.862383328941235, 
323.1508705514257, 2052.2268378673266, 850.6080519924241, 1899.711308576575], 
"eval_len": [695, 399, 841, 1000, 74, 32, 173, 1000, 547, 1000]}

  9%|▉         | 89998/1000000 [47:41<5:46:40, 43.75it/s]global step 90000, trans_decision ep_re 1352.1796144831908

{"global_step": 90000, "eval_re": [1785.5268969194399, 1214.3428874484948, 
1454.8087769537956, 928.0335548374024, 1853.8706651799298, 1890.9129169410596, 
1922.780794981282, 555.4987690078137, 1320.4743461713567, 595.5465363913328], 
"eval_len": [1000, 1000, 787, 515, 1000, 1000, 1000, 317, 1000, 269]}

 10%|▉         | 99995/1000000 [53:11<5:47:43, 43.14it/s]global step 100000, trans_decision ep_re 1808.993272760293

{"global_step": 100000, "eval_re": [1097.9676539140703, 1787.6119855456748, 
1801.9281617273589, 1868.6400681566568, 1974.7517409264854, 1688.9885190447883, 
1995.182677062082, 1861.1373994153003, 2022.0394277985931, 1991.6850940119216], 
"eval_len": [628, 1000, 1000, 1000, 1000, 956, 1000, 1000, 1000, 1000]}

 11%|█         | 109995/1000000 [58:51<5:43:02, 43.24it/s]global step 110000, trans_decision ep_re 662.1045719821916

{"global_step": 110000, "eval_re": [1360.8089841504332, 170.0235398218235, 
200.8547526890181, 846.6234247328517, 102.00493812150545, 156.3268576654076, 
427.7474414872205, 676.1327822418606, 1443.50869901433, 1237.0142998974654], 
"eval_len": [830, 97, 124, 409, 60, 79, 301, 367, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:04:11<5:36:22, 43.60it/s]global step 120000, trans_decision ep_re 1911.255697433182

{"global_step": 120000, "eval_re": [2560.9934926155056, 1655.3663463761409, 
2342.7711717438815, 2530.760759351083, 2398.4178086435077, 2194.7972197805825, 
711.0272049704281, 2419.0240628743463, 1879.6310095596957, 419.767898416646], 
"eval_len": [1000, 720, 1000, 1000, 1000, 1000, 280, 1000, 790, 220]}

 13%|█▎        | 129995/1000000 [1:09:51<5:30:00, 43.94it/s]global step 130000, trans_decision ep_re 924.2376939103253

{"global_step": 130000, "eval_re": [1407.5364506399292, 888.5913298542488, 
2327.2608769388817, 1791.3466820688693, 459.1907716254783, 583.115717207417, 
291.20536294501227, 205.94244089871486, 1026.7599660713088, 261.42734085339225],
"eval_len": [1000, 347, 1000, 704, 239, 247, 155, 117, 384, 112]}

 14%|█▍        | 139995/1000000 [1:15:12<5:30:08, 43.42it/s]global step 140000, trans_decision ep_re 1220.2931965958599

{"global_step": 140000, "eval_re": [2008.2183670743893, 1509.1485763036235, 
1621.2965264522193, 344.12569550958057, 1447.413872330381, 2309.0538060543167, 
114.8665393525275, 1319.6748504605157, 1101.1966501672925, 427.93708225375065], 
"eval_len": [1000, 1000, 669, 190, 1000, 1000, 105, 1000, 518, 197]}

 15%|█▍        | 149995/1000000 [1:20:32<5:23:44, 43.76it/s]global step 150000, trans_decision ep_re 1696.0387351241436

{"global_step": 150000, "eval_re": [867.007406707354, 2124.8082905927304, 
1392.0272867911162, 2279.3397132121345, 1017.2918520827632, 2124.3001375829604, 
886.3941489582478, 2015.2879880555909, 2037.8925105865837, 2216.038016671954], 
"eval_len": [444, 1000, 1000, 1000, 445, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159995/1000000 [1:26:12<5:23:09, 43.32it/s]global step 160000, trans_decision ep_re 1366.1725918434522

{"global_step": 160000, "eval_re": [689.9153464484272, 1102.2118608081414, 
2603.617033861201, 2240.92844974568, 2225.001797057478, 1229.71293993914, 
914.2374202665384, 502.2040531851801, 808.5860385927841, 1345.3109785299505], 
"eval_len": [300, 1000, 1000, 1000, 1000, 1000, 423, 208, 1000, 596]}

 17%|█▋        | 169995/1000000 [1:31:42<5:18:16, 43.46it/s]global step 170000, trans_decision ep_re 1460.2863383382587

{"global_step": 170000, "eval_re": [686.1993610545608, 1102.8433946250525, 
1593.4262544029762, 2036.4957959999724, 734.050408517103, 1566.094945365843, 
1431.0608153893315, 731.8839407401761, 2221.3674661941836, 2499.4410010933875], 
"eval_len": [304, 521, 670, 1000, 332, 656, 638, 325, 1000, 1000]}

 18%|█▊        | 179995/1000000 [1:37:02<5:14:31, 43.45it/s]global step 180000, trans_decision ep_re 1837.6237121627892

{"global_step": 180000, "eval_re": [2178.519891883653, 1551.2895755488344, 
2468.2908680946107, 80.71993854320836, 370.1764696262965, 2402.4647492843965, 
2386.6534984554305, 2315.6341470361385, 2266.3659613460636, 2356.1220218092585],
"eval_len": [1000, 1000, 1000, 45, 190, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189995/1000000 [1:42:32<5:08:51, 43.71it/s]global step 190000, trans_decision ep_re 1987.531585657241

{"global_step": 190000, "eval_re": [2132.3717004516507, 2386.82597610787, 
2312.651325099467, 2425.701963581434, 2294.3368029497005, 2268.6635507327105, 
867.7573528044583, 396.7204551603317, 2437.002418910033, 2353.2843107747585], 
"eval_len": [892, 1000, 1000, 1000, 1000, 1000, 402, 177, 1000, 1000]}

 20%|█▉        | 199995/1000000 [1:48:02<5:07:51, 43.31it/s]global step 200000, trans_decision ep_re 1654.5064293047858

{"global_step": 200000, "eval_re": [545.6573403032139, 2456.7074015623625, 
2827.0513798921415, 1040.7542804591487, 2411.991551153899, 2569.895172175661, 
1026.3764263454416, 795.3593537704211, 2420.4089647867213, 450.8624225988468], 
"eval_len": [248, 1000, 1000, 432, 1000, 1000, 1000, 294, 1000, 173]}

 21%|██        | 209999/1000000 [1:53:32<5:04:09, 43.29it/s]global step 210000, trans_decision ep_re 1954.3995760158086

{"global_step": 210000, "eval_re": [959.6169659313864, 2582.693074682312, 
2412.8983594744504, 598.6143384483105, 2284.5388629821664, 2569.653022776152, 
2316.261615713163, 2314.494599206194, 2531.038578310452, 974.1863426335001], 
"eval_len": [373, 1000, 1000, 220, 947, 1000, 1000, 1000, 1000, 409]}

 22%|██▏       | 219995/1000000 [1:59:02<4:59:07, 43.46it/s]global step 220000, trans_decision ep_re 2408.85952921819

{"global_step": 220000, "eval_re": [2260.953649181272, 2416.6187704592135, 
2520.228291810245, 2609.9272717952294, 2375.366043790481, 2741.140077925543, 
2366.963289800009, 2592.324072527919, 1763.2329078551224, 2441.840917036864], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 713, 1000]}

 23%|██▎       | 229995/1000000 [2:04:42<4:53:40, 43.70it/s]global step 230000, trans_decision ep_re 1947.4652301309773

{"global_step": 230000, "eval_re": [2463.9593440477047, 2385.5165636446936, 
2555.332884621982, 434.38196896888087, 2504.785645882213, 1447.1991575083441, 
2337.442184194586, 2450.2139616350546, 381.7589328696458, 2514.061657936668], 
"eval_len": [1000, 1000, 1000, 170, 1000, 622, 1000, 1000, 188, 1000]}

 24%|██▍       | 239995/1000000 [2:10:12<4:52:16, 43.34it/s]global step 240000, trans_decision ep_re 1883.6136468004922

{"global_step": 240000, "eval_re": [2568.334083034454, 2531.491024111023, 
2724.3046976556207, 2501.769005690095, 339.46022894246545, 946.2694885795634, 
569.3070038176263, 1911.1360286700785, 2340.1599896436264, 2403.9049178603705], 
"eval_len": [1000, 1000, 1000, 1000, 155, 342, 229, 772, 1000, 1000]}

 25%|██▍       | 249995/1000000 [2:15:42<4:47:15, 43.52it/s]global step 250000, trans_decision ep_re 1646.089733815014

{"global_step": 250000, "eval_re": [2437.594763719561, 1286.0590134492757, 
2690.4952479131675, 257.08765476782037, 695.1193760172922, 2512.24863836225, 
171.58459253044018, 2291.4342484110944, 1685.9266949659739, 2433.3471080132645],
"eval_len": [1000, 497, 1000, 123, 284, 1000, 89, 1000, 1000, 1000]}

 26%|██▌       | 259995/1000000 [2:21:12<4:47:15, 42.93it/s]global step 260000, trans_decision ep_re 1691.5241566150296

{"global_step": 260000, "eval_re": [130.66298506110417, 2646.4834118044987, 
2758.5857718268, 2689.2582274570686, 357.4123452523793, 2470.3204800924723, 
888.1507401345415, 806.9758187552108, 1340.4049460363324, 2826.9868397298883], 
"eval_len": [77, 1000, 1000, 1000, 228, 1000, 409, 334, 594, 1000]}

 27%|██▋       | 269995/1000000 [2:26:33<4:39:06, 43.59it/s]global step 270000, trans_decision ep_re 2234.5758933890625

{"global_step": 270000, "eval_re": [2171.05100041239, 2425.2110864948563, 
2082.16568847277, 2355.994165446314, 2171.058109304575, 2106.0798819585025, 
2194.038400124696, 2437.4416153269713, 2095.4164023858257, 2307.302583963725], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279995/1000000 [2:32:13<4:36:23, 43.42it/s]global step 280000, trans_decision ep_re 1747.7889706725032

{"global_step": 280000, "eval_re": [2489.417110051914, 1614.372067683528, 
2356.8156201505853, 240.777871256443, 1164.0128884753876, 2579.4425929800072, 
1824.4237955040098, 473.49489094004247, 2320.358745441043, 2414.774124242071], 
"eval_len": [1000, 1000, 1000, 108, 483, 1000, 772, 229, 1000, 1000]}

 29%|██▉       | 289995/1000000 [2:37:43<4:31:30, 43.58it/s]global step 290000, trans_decision ep_re 2159.585748491266

{"global_step": 290000, "eval_re": [2412.7387192662286, 2188.119438620327, 
2560.417943032138, 2398.392414257642, 2391.1707347299516, 2645.3667472966645, 
2171.7059188898074, 2341.7520789276446, 1950.8127703865896, 535.3807195056653], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 771, 231]}

 30%|██▉       | 299999/1000000 [2:43:13<4:30:50, 43.08it/s]global step 300000, trans_decision ep_re 1714.761322342652

{"global_step": 300000, "eval_re": [2478.15771653549, 974.8689242888049, 
1391.9784608699376, 137.5855428835372, 2598.2154654816686, 2517.2573537641506, 
2326.8177992032915, 2481.498435112484, 1172.369533230363, 1068.863992056794], 
"eval_len": [1000, 408, 557, 88, 1000, 1000, 1000, 1000, 515, 523]}

 31%|███       | 309995/1000000 [2:48:43<4:22:33, 43.80it/s]global step 310000, trans_decision ep_re 2088.3691285085856

{"global_step": 310000, "eval_re": [2208.8319018368347, 2274.6882621396708, 
2214.4515505866084, 2428.061237634645, 2251.382760708175, 2311.8754022306057, 
861.8208724017778, 1669.2960966763014, 2237.014269116902, 2426.2689317543354], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 393, 1000, 1000, 1000]}

 32%|███▏      | 319995/1000000 [2:54:23<4:20:12, 43.55it/s]global step 320000, trans_decision ep_re 1738.7284331497947

{"global_step": 320000, "eval_re": [2499.9774596135417, 2437.890899900756, 
2459.3969517188525, 2128.030337509117, 2415.6127521148005, 332.87222884774786, 
1176.715077345406, 1245.0294539088432, 300.50418516155094, 2391.254985377331], 
"eval_len": [1000, 1000, 1000, 871, 1000, 130, 478, 589, 146, 1000]}

 33%|███▎      | 329995/1000000 [2:59:43<4:15:25, 43.72it/s]global step 330000, trans_decision ep_re 2186.741630710726

{"global_step": 330000, "eval_re": [739.9366843732992, 2014.7937537084676, 
2682.6805083427544, 2652.492769271518, 2640.076550707876, 2592.034358692538, 
1345.7794021776997, 2523.6418247864763, 2123.18567474996, 2552.7947802966705], 
"eval_len": [316, 710, 1000, 1000, 1000, 1000, 467, 1000, 1000, 1000]}

 34%|███▍      | 339995/1000000 [3:05:13<4:13:38, 43.37it/s]global step 340000, trans_decision ep_re 1528.7886067275056

{"global_step": 340000, "eval_re": [177.49070842098703, 2113.1443991980937, 
2481.5381306696395, 2475.770823289234, 1810.607722831034, 1243.0353674105506, 
329.93377191733833, 968.2614475999704, 961.479489809526, 2726.6242061286807], 
"eval_len": [81, 1000, 1000, 1000, 674, 454, 131, 346, 352, 1000]}

 35%|███▍      | 349995/1000000 [3:10:43<4:08:55, 43.52it/s]global step 350000, trans_decision ep_re 2138.7766893962935

{"global_step": 350000, "eval_re": [1614.6479728615373, 2678.675248328217, 
2541.0429132914664, 2460.655406630479, 2597.299825001915, 2385.052822353393, 
1146.5981744783796, 2570.4217993589423, 966.6323927257429, 2426.740338932864], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 393, 1000]}

 36%|███▌      | 359995/1000000 [3:16:13<4:05:29, 43.45it/s]global step 360000, trans_decision ep_re 2006.6972553254823

{"global_step": 360000, "eval_re": [950.9966634615199, 2561.121089909871, 
2511.257831181518, 1611.3919539178462, 2452.861265765587, 2212.0413017517403, 
681.0037627840461, 2616.8574668223873, 2565.350621627432, 1904.090596032875], 
"eval_len": [368, 1000, 1000, 521, 1000, 1000, 284, 1000, 1000, 1000]}

 37%|███▋      | 369995/1000000 [3:21:43<3:59:48, 43.79it/s]global step 370000, trans_decision ep_re 1628.7482951431466

{"global_step": 370000, "eval_re": [2477.3773270614165, 1087.5515689144368, 
2449.3436354778482, 2486.3871831749984, 729.576589471059, 2547.494811944182, 
1178.5962391474068, 629.0677377228204, 262.95881737237573, 2439.129041144924], 
"eval_len": [1000, 402, 940, 1000, 302, 1000, 477, 262, 139, 1000]}

 38%|███▊      | 379995/1000000 [3:27:03<3:57:15, 43.55it/s]global step 380000, trans_decision ep_re 1421.270958538653

{"global_step": 380000, "eval_re": [372.9477730030546, 2492.3925058445443, 
215.65852965600143, 622.5300079100258, 1341.7992068328697, 2380.5829584352755, 
863.7779666095306, 2643.4900956560396, 2669.9629264889063, 609.5676149502832], 
"eval_len": [179, 1000, 112, 260, 512, 1000, 376, 1000, 1000, 225]}

 39%|███▉      | 389995/1000000 [3:32:23<3:51:46, 43.86it/s]global step 390000, trans_decision ep_re 1329.087697471499

{"global_step": 390000, "eval_re": [767.1951569463376, 780.3886788296211, 
2574.334150494975, 2389.3652303646522, 281.9462332395001, 2441.081430039623, 
207.3521276201277, 578.6392089948182, 2548.3159184686956, 722.2588397166425], 
"eval_len": [1000, 315, 1000, 1000, 109, 1000, 102, 236, 1000, 266]}

 40%|███▉      | 399995/1000000 [3:37:53<3:48:47, 43.71it/s]global step 400000, trans_decision ep_re 2528.674663324492

{"global_step": 400000, "eval_re": [2779.2877328132317, 2586.541341710725, 
2545.230844982021, 2290.535549548175, 2703.4461770948883, 2297.3685320858413, 
2520.410420526192, 2552.4913530021277, 2518.086827732929, 2493.3478537487917], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409995/1000000 [3:43:23<3:45:02, 43.69it/s]global step 410000, trans_decision ep_re 1450.8055442814186

{"global_step": 410000, "eval_re": [828.98136831663, 1958.4541472703363, 
92.86424175001109, 2231.5540533797725, 294.4084995181564, 2371.8435798868964, 
2336.6257171601555, 2356.9852543164566, 1841.5883089846968, 194.75027223107426],
"eval_len": [338, 870, 54, 1000, 140, 978, 1000, 1000, 1000, 135]}

 42%|████▏     | 419995/1000000 [3:48:53<3:43:28, 43.26it/s]global step 420000, trans_decision ep_re 1707.1011453489705

{"global_step": 420000, "eval_re": [2377.610668944812, 586.7849051335616, 
2116.4722746192347, 1818.0837400308108, 1008.8594379239231, 926.363998224662, 
2462.153673516726, 2546.7799953547847, 433.95364974695156, 2793.949109994236], 
"eval_len": [1000, 1000, 1000, 738, 454, 375, 1000, 1000, 189, 1000]}

 43%|████▎     | 429995/1000000 [3:54:23<3:37:20, 43.71it/s]global step 430000, trans_decision ep_re 1730.3749844695365

{"global_step": 430000, "eval_re": [262.9306547800743, 2354.310911835723, 
2424.223303622345, 2490.0519028055705, 2402.7557160818524, 1516.7597156438903, 
2721.963686257691, 2650.3180246291345, 203.43638159519958, 276.99954744388305], 
"eval_len": [121, 886, 1000, 1000, 1000, 601, 1000, 1000, 101, 127]}

 44%|████▍     | 439997/1000000 [3:59:43<3:35:11, 43.37it/s]global step 440000, trans_decision ep_re 1978.6127335651404

{"global_step": 440000, "eval_re": [2258.6960238551114, 279.11848625353286, 
2336.053032047266, 2715.586641771312, 2612.7921066625413, 2581.2594221692525, 
341.6819195800955, 2394.492793690207, 1788.3465276826287, 2478.100381939456], 
"eval_len": [1000, 124, 1000, 1000, 1000, 1000, 154, 1000, 1000, 1000]}

 45%|████▍     | 449995/1000000 [4:05:13<3:30:33, 43.53it/s]global step 450000, trans_decision ep_re 1559.3552947420226

{"global_step": 450000, "eval_re": [2556.6349771276364, 2608.615582856015, 
1876.9072071975884, 1828.0577823301073, 2696.9644395966884, 1378.0562991473855, 
536.8707586421427, 194.52682700906314, 723.9379712575983, 1192.9811022560018], 
"eval_len": [1000, 1000, 728, 659, 1000, 598, 1000, 99, 331, 492]}

 46%|████▌     | 459995/1000000 [4:10:43<3:24:31, 44.00it/s]global step 460000, trans_decision ep_re 1604.2584663421562

{"global_step": 460000, "eval_re": [1765.7580630997068, 2664.062159117891, 
868.2671851725783, 1850.734637906512, 1671.7929665590534, 344.77395366982523, 
2595.2271409612686, 1428.2289699397434, 803.8903283343781, 2049.849258660604], 
"eval_len": [589, 1000, 369, 709, 598, 235, 1000, 572, 323, 812]}

 47%|████▋     | 469995/1000000 [4:16:13<3:23:11, 43.47it/s]global step 470000, trans_decision ep_re 1923.8817959836288

{"global_step": 470000, "eval_re": [2524.6188506232907, 164.11561451297416, 
2497.406894530124, 2102.6314560834935, 1050.1747841206977, 2384.940852956473, 
1373.910819917672, 2347.9780356588612, 2358.3476087432614, 2434.6930426894405], 
"eval_len": [1000, 71, 1000, 906, 467, 1000, 650, 968, 1000, 1000]}

 48%|████▊     | 479995/1000000 [4:21:43<3:17:54, 43.79it/s]global step 480000, trans_decision ep_re 1971.317712069535

{"global_step": 480000, "eval_re": [2465.0842039371137, 1214.6234402146715, 
2498.310436763618, 2453.1279094342717, 2476.1724022764593, 29.477560857200615, 
1290.6129848457772, 2517.9254010538625, 2324.8746396213596, 2442.968141691015], 
"eval_len": [1000, 577, 1000, 1000, 1000, 33, 577, 1000, 1000, 1000]}

 49%|████▉     | 489995/1000000 [4:27:14<3:14:44, 43.65it/s]global step 490000, trans_decision ep_re 1924.1221667537568

{"global_step": 490000, "eval_re": [2461.0720290707527, 594.4807899096976, 
2444.216248207986, 2458.350633120198, 2530.8203127619095, 696.3897897701537, 
782.1839165420953, 2416.3962563181153, 2372.5375689660063, 2484.7741228706545], 
"eval_len": [1000, 271, 1000, 1000, 1000, 374, 421, 1000, 996, 1000]}

 50%|████▉     | 499995/1000000 [4:32:44<3:11:06, 43.61it/s]global step 500000, trans_decision ep_re 2198.541401765382

{"global_step": 500000, "eval_re": [2329.263159573485, 2558.955827134624, 
2596.3814417316694, 2533.638010354423, 496.36499771876333, 2312.2389495420953, 
2144.955892890893, 2199.0367271403675, 2445.845226911616, 2368.7337846558858], 
"eval_len": [1000, 1000, 1000, 1000, 216, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509995/1000000 [4:38:14<3:09:33, 43.08it/s]global step 510000, trans_decision ep_re 1289.0713733055545

{"global_step": 510000, "eval_re": [314.5529565397536, 485.3399415580177, 
1875.3889326813396, 674.907498870485, 2428.0214348202444, 848.2328808439473, 
1914.6124621847166, 2377.333872951996, 1777.3919676492676, 194.93178495577763], 
"eval_len": [139, 193, 778, 1000, 1000, 373, 814, 1000, 685, 95]}

 52%|█████▏    | 519995/1000000 [4:43:44<3:05:05, 43.22it/s]global step 520000, trans_decision ep_re 1845.2937376556365

{"global_step": 520000, "eval_re": [674.6988854244582, 2430.5000159751103, 
2372.4934316051017, 2406.4849324941997, 2353.9328015018054, 2580.218903873318, 
2490.394551650685, 947.920041128952, 2178.7902129702225, 17.503599932512895], 
"eval_len": [323, 1000, 1000, 1000, 1000, 1000, 1000, 397, 1000, 23]}

 53%|█████▎    | 529995/1000000 [4:49:14<2:59:27, 43.65it/s]global step 530000, trans_decision ep_re 1902.9469335013516

{"global_step": 530000, "eval_re": [2632.565173381205, 868.6199404792283, 
2414.0619199968182, 2055.201776683112, 2352.867581256583, 193.34977595324042, 
2425.538220831282, 1160.5017248833196, 2488.8179389885945, 2437.9452825601343], 
"eval_len": [1000, 1000, 967, 894, 1000, 89, 1000, 500, 1000, 1000]}

 54%|█████▍    | 539995/1000000 [4:54:44<2:57:28, 43.20it/s]global step 540000, trans_decision ep_re 975.9681043348759

{"global_step": 540000, "eval_re": [2262.5895558335087, 456.190867139883, 
71.45419743611318, 2248.859147232503, 175.98617855958437, 753.9390380020517, 
1316.8991704343016, 1442.1755114341458, 323.922142310952, 707.665234965716], 
"eval_len": [1000, 246, 50, 1000, 83, 1000, 1000, 1000, 146, 327]}

 55%|█████▍    | 549999/1000000 [5:00:04<2:52:14, 43.54it/s]global step 550000, trans_decision ep_re 1979.0528626042603

{"global_step": 550000, "eval_re": [2476.0933637171893, 218.94016974594422, 
2044.0703191010114, 1840.9959535877076, 2324.9290722802284, 2535.0699570368934, 
1747.8816380550825, 2585.3321301894, 1454.3097686901885, 2562.906253638956], 
"eval_len": [1000, 109, 819, 772, 1000, 918, 716, 1000, 652, 1000]}

 56%|█████▌    | 559995/1000000 [5:05:34<2:48:27, 43.53it/s]global step 560000, trans_decision ep_re 2008.3855644474352

{"global_step": 560000, "eval_re": [2283.078560132287, 2352.2756200392864, 
1246.0989371781318, 2150.5777730074874, 924.9037132000782, 2046.655304316588, 
2397.6139388899123, 2341.956404063327, 2133.0759116932213, 2207.619481954032], 
"eval_len": [1000, 1000, 1000, 1000, 457, 869, 1000, 1000, 931, 1000]}

 57%|█████▋    | 569995/1000000 [5:11:14<2:45:25, 43.32it/s]global step 570000, trans_decision ep_re 1421.866365589121

{"global_step": 570000, "eval_re": [2543.224078696407, 2439.4120985323375, 
2348.26533051616, 755.5631261572031, 1066.5147707777649, 2538.5159126157323, 
365.86728167006385, 1257.0410125629032, 91.13238940341607, 813.1276549592232], 
"eval_len": [1000, 1000, 1000, 364, 431, 1000, 173, 1000, 54, 372]}

 58%|█████▊    | 579995/1000000 [5:16:34<2:41:18, 43.40it/s]global step 580000, trans_decision ep_re 1496.1095256725007

{"global_step": 580000, "eval_re": [92.97926829914078, 2182.4757236730834, 
1900.422165817973, 757.6281974203386, 935.2554809242309, 2421.254383696896, 
476.8608385290222, 1229.559532268113, 2381.1842200723795, 2583.4754460238305], 
"eval_len": [60, 1000, 814, 326, 391, 1000, 188, 519, 1000, 1000]}

 59%|█████▉    | 589995/1000000 [5:21:54<2:37:41, 43.34it/s]global step 590000, trans_decision ep_re 1755.1662806587897

{"global_step": 590000, "eval_re": [178.16399074826816, 2389.0472056810627, 
2326.9666599747156, 2300.845986740428, 2499.3044869585797, 2256.7607704510474, 
476.87475998049376, 931.279173095807, 2566.6901337213244, 1625.7296392361707], 
"eval_len": [129, 1000, 1000, 1000, 1000, 1000, 267, 424, 1000, 840]}

 60%|█████▉    | 599995/1000000 [5:27:24<2:33:18, 43.48it/s]global step 600000, trans_decision ep_re 1636.5480843287385

{"global_step": 600000, "eval_re": [2214.1820172778703, 2725.5658227581043, 
2110.7504733819624, 1296.2119150938431, 328.5636156540728, 2332.6153047634875, 
225.184172363983, 2444.720111078572, 162.7851781275782, 2524.9022327879125], 
"eval_len": [941, 1000, 871, 558, 1000, 1000, 104, 1000, 79, 1000]}

 61%|██████    | 609995/1000000 [5:32:54<2:29:06, 43.59it/s]global step 610000, trans_decision ep_re 1302.3495808827454

{"global_step": 610000, "eval_re": [690.7632047475481, 1367.204023233251, 
397.0204101238232, 1646.9714941107547, 150.9075443952498, 1137.8573258441081, 
2463.871915849598, 2427.6723405583557, 1571.1251980140407, 1170.1023519507257], 
"eval_len": [1000, 622, 220, 626, 76, 523, 1000, 1000, 653, 500]}

 62%|██████▏   | 619999/1000000 [5:38:14<2:25:11, 43.62it/s]global step 620000, trans_decision ep_re 2329.676087486713

{"global_step": 620000, "eval_re": [1998.691236271249, 2497.7866652978482, 
2544.45966073231, 2310.1673955215733, 2639.0433764972067, 2452.336836354701, 
1690.5400910342807, 2433.7329283538734, 2251.9196677021073, 2478.0830171019875],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 684, 1000, 869, 1000]}

 63%|██████▎   | 629995/1000000 [5:43:44<2:21:09, 43.69it/s]global step 630000, trans_decision ep_re 1782.4216963636086

{"global_step": 630000, "eval_re": [2259.709211088391, 830.2204352062859, 
2411.94769007307, 384.3646530766869, 2632.7838565245156, 1641.5517888201896, 
2586.0886843359795, 100.13073330658946, 2566.368748159044, 2411.051163045332], 
"eval_len": [1000, 308, 1000, 170, 1000, 702, 1000, 53, 1000, 1000]}

 64%|██████▍   | 639995/1000000 [5:49:14<2:18:14, 43.40it/s]global step 640000, trans_decision ep_re 2185.5438674305237

{"global_step": 640000, "eval_re": [1110.4608808655287, 2305.449077367942, 
2142.557696003679, 2184.9127749073596, 2407.5853175890807, 2314.562307658165, 
2344.7283168612994, 2292.1289573797762, 2457.8959279826004, 2295.1574176898084],
"eval_len": [497, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649995/1000000 [5:54:54<2:13:34, 43.67it/s]global step 650000, trans_decision ep_re 2149.9446391083648

{"global_step": 650000, "eval_re": [2374.403607252964, 2614.7274108637666, 
2013.1152955190694, 2531.3467389687444, 283.1868717815079, 2368.801030160189, 
2369.761350044205, 2266.006067669881, 2260.94711546305, 2417.150903360269], 
"eval_len": [1000, 1000, 1000, 1000, 136, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659995/1000000 [6:00:25<2:11:12, 43.19it/s]global step 660000, trans_decision ep_re 1979.7704009338281

{"global_step": 660000, "eval_re": [2448.421854894092, 2231.776185339124, 
2573.413202089128, 2252.323121017134, 2289.351420026353, 2138.8553327197765, 
1400.1207357684802, 2250.724260740092, 1277.7332016512664, 934.9846950928317], 
"eval_len": [1000, 923, 1000, 1000, 1000, 913, 635, 1000, 550, 1000]}

 67%|██████▋   | 669995/1000000 [6:05:55<2:06:08, 43.60it/s]global step 670000, trans_decision ep_re 1069.6704901949877

{"global_step": 670000, "eval_re": [1063.8455516115296, 1571.8652718431424, 
1842.1879313985642, 2000.7592507029008, 1668.5343863377882, 347.0785973959297, 
917.3216697106769, -148.40021313341035, 274.2916366352686, 1159.220819447487], 
"eval_len": [444, 1000, 855, 895, 759, 224, 1000, 1000, 130, 576]}

 68%|██████▊   | 679995/1000000 [6:11:25<2:01:32, 43.88it/s]global step 680000, trans_decision ep_re 1685.2499604765974

{"global_step": 680000, "eval_re": [285.1812164305503, 2245.509846862018, 
1298.292207622801, 2326.6813617321527, 2463.741057018821, 2422.606844469826, 
2209.3662594170974, 261.4266057675817, 927.0435060913743, 2412.6506993537496], 
"eval_len": [122, 1000, 603, 1000, 1000, 1000, 1000, 120, 420, 1000]}

 69%|██████▉   | 689995/1000000 [6:16:55<1:58:20, 43.66it/s]global step 690000, trans_decision ep_re 1796.3961159001287

{"global_step": 690000, "eval_re": [2226.1235269022127, 2077.444702352734, 
370.9148869007182, 2223.799568564733, 1121.1369772428627, 2146.348982619341, 
2320.289161525051, 2195.94191710487, 1114.029643975533, 2167.9317918132324], 
"eval_len": [1000, 1000, 199, 1000, 517, 1000, 1000, 1000, 539, 1000]}

 70%|██████▉   | 699995/1000000 [6:22:25<1:54:23, 43.71it/s]global step 700000, trans_decision ep_re 1960.0668212977994

{"global_step": 700000, "eval_re": [2027.6100027324392, 2208.0667013486513, 
1228.3018274697768, 2371.080685080326, 1521.396794261868, 1787.6368893157257, 
2374.215748675846, 2383.2606232223957, 2324.684112459405, 1374.4148284115631], 
"eval_len": [1000, 1000, 567, 1000, 657, 778, 1000, 1000, 1000, 1000]}

 71%|███████   | 709995/1000000 [6:27:55<1:51:37, 43.30it/s]global step 710000, trans_decision ep_re 1580.385965095284

{"global_step": 710000, "eval_re": [1097.9139140555887, 2312.8298929507455, 
2425.9800763212556, 1397.093207933789, 2276.023955717689, 1236.6669412655685, 
2339.4088786844895, 1747.4706114176597, 697.4063029279981, 273.06586967805475], 
"eval_len": [425, 1000, 1000, 571, 1000, 1000, 1000, 684, 287, 144]}

 72%|███████▏  | 719995/1000000 [6:33:25<1:47:08, 43.56it/s]global step 720000, trans_decision ep_re 2250.260651352374

{"global_step": 720000, "eval_re": [1670.1414280032068, 2415.9337819803627, 
2252.9079809664468, 2294.6647003315215, 2369.048251830887, 2390.5461206417444, 
2265.0111374052503, 2487.3490493842096, 2022.3165387144982, 2334.687524265616], 
"eval_len": [751, 1000, 1000, 1000, 1000, 1000, 992, 1000, 1000, 1000]}

 73%|███████▎  | 729996/1000000 [6:38:55<1:42:54, 43.73it/s]global step 730000, trans_decision ep_re 1856.1902577704914

{"global_step": 730000, "eval_re": [496.64316713698923, 2166.3874070790744, 
2175.2219923258463, 1954.5077568517768, 2256.984500333607, 1369.6434623953762, 
1141.3883935388105, 2317.914385044145, 2489.5808683686164, 2193.6306446306735], 
"eval_len": [236, 1000, 1000, 772, 1000, 569, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739995/1000000 [6:44:25<1:40:34, 43.09it/s]global step 740000, trans_decision ep_re 1901.7978252025828

{"global_step": 740000, "eval_re": [2318.700690560647, 1091.4761058680065, 
2243.7185583062924, 1682.7491248733543, 1585.0666996004043, 2705.355428724915, 
989.3228213962043, 2547.3197944294284, 2293.7967279607283, 1560.4723003058477], 
"eval_len": [1000, 1000, 1000, 1000, 590, 1000, 364, 1000, 1000, 609]}

 75%|███████▍  | 749995/1000000 [6:49:55<1:35:50, 43.47it/s]global step 750000, trans_decision ep_re 1441.4319468878393

{"global_step": 750000, "eval_re": [2197.4943914392607, 484.5501746104045, 
2203.495538171133, 356.6720336234186, 2214.265865169903, 474.5994250291443, 
2294.822988091938, 2321.7976824666634, 1740.6969307127642, 125.92443956376528], 
"eval_len": [1000, 192, 1000, 169, 1000, 248, 1000, 1000, 1000, 82]}

 76%|███████▌  | 759995/1000000 [6:55:25<1:31:20, 43.79it/s]global step 760000, trans_decision ep_re 1264.4418573981193

{"global_step": 760000, "eval_re": [1054.782002043387, 1168.9244864255213, 
2255.3147815398193, 600.2442468720942, 1110.4862808026944, 953.41065610856, 
1552.1955370916567, 427.45222998495984, 2388.831355977817, 1132.776997134685], 
"eval_len": [1000, 568, 1000, 269, 459, 1000, 1000, 176, 1000, 486]}

 77%|███████▋  | 769995/1000000 [7:00:45<1:28:10, 43.47it/s]global step 770000, trans_decision ep_re 2275.870653729017

{"global_step": 770000, "eval_re": [2390.658065644718, 2386.9287571427, 
1569.8136920776728, 2570.3789034676206, 2277.6289249682427, 2466.362958923036, 
2404.394787206298, 2092.490149327748, 2605.6668137807983, 1994.3834847513365], 
"eval_len": [1000, 1000, 634, 1000, 1000, 1000, 1000, 780, 1000, 1000]}

 78%|███████▊  | 779995/1000000 [7:06:25<1:24:19, 43.49it/s]global step 780000, trans_decision ep_re 1909.0642207169033

{"global_step": 780000, "eval_re": [2406.205494178276, 2327.7940869879153, 
2180.2270032958745, 223.6202286104502, 861.6976144834837, 1865.3019470586048, 
2623.4537466818033, 2239.3087154306313, 2249.5378701958934, 2113.4955002460993],
"eval_len": [1000, 946, 924, 143, 325, 743, 1000, 1000, 1000, 917]}

 79%|███████▉  | 789995/1000000 [7:11:55<1:20:18, 43.58it/s]global step 790000, trans_decision ep_re 2098.287685997234

{"global_step": 790000, "eval_re": [2158.739411647076, 2045.5110727398783, 
2266.55608257788, 2333.557894118611, 2299.880982083642, 2138.655121390009, 
2264.059097126739, 2173.4352782099063, 1260.888945365304, 2041.5929747132989], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 507, 1000]}

 80%|███████▉  | 799995/1000000 [7:17:25<1:17:19, 43.11it/s]global step 800000, trans_decision ep_re 1659.3055408592784

{"global_step": 800000, "eval_re": [1471.1429118925419, 879.3647678814735, 
2206.47177473475, 2036.2865795976875, 1686.4225964367643, 1067.9275506083015, 
1968.934167550758, 2208.9003849163137, 2293.3760850739227, 774.2285899002699], 
"eval_len": [743, 429, 1000, 1000, 801, 511, 1000, 1000, 1000, 382]}

 81%|████████  | 809995/1000000 [7:22:55<1:12:36, 43.61it/s]global step 810000, trans_decision ep_re 1554.8332778722256

{"global_step": 810000, "eval_re": [495.128486026308, 322.4952994510239, 
266.3284901722112, 1775.0250855075776, 231.07016587928652, 2343.1813588943933, 
2674.837665495519, 2609.569609290849, 2387.434687823637, 2443.26193018145], 
"eval_len": [226, 155, 139, 710, 97, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819995/1000000 [7:28:15<1:09:00, 43.47it/s]global step 820000, trans_decision ep_re 1636.9320679887205

{"global_step": 820000, "eval_re": [2127.153323880601, 2160.088127170912, 
1711.3877104680287, 2298.479739603538, 616.936738686043, 2277.775249854572, 
1555.6177811200816, 1169.1179773503948, 1665.838427866392, 786.9256038866433], 
"eval_len": [1000, 1000, 758, 1000, 262, 1000, 623, 495, 743, 327]}

 83%|████████▎ | 829995/1000000 [7:33:45<1:05:20, 43.37it/s]global step 830000, trans_decision ep_re 2091.181074011821

{"global_step": 830000, "eval_re": [2288.3739866195724, 2269.890468231567, 
2011.3208018270604, 1664.706408725175, 2151.8063143512813, 2024.9940881382693, 
2162.6765021642136, 1927.9011726389706, 2305.945656617324, 2104.1953408047734], 
"eval_len": [1000, 1000, 1000, 739, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839995/1000000 [7:39:15<1:01:09, 43.61it/s]global step 840000, trans_decision ep_re 1835.6541866729349

{"global_step": 840000, "eval_re": [1852.8687528766777, 2351.913938373769, 
2198.3458004659165, 818.894066122979, 2546.550705619154, 2520.430435648176, 
2396.2561779368293, 133.4961145171836, 1204.2025057999097, 2333.583369368753], 
"eval_len": [686, 1000, 862, 347, 1000, 1000, 1000, 95, 529, 1000]}

 85%|████████▍ | 849995/1000000 [7:44:45<57:16, 43.65it/s]global step 850000, trans_decision ep_re 2157.7165017388625

{"global_step": 850000, "eval_re": [2363.738798761042, 1190.5188692788222, 
1010.1056308720661, 2482.987691235252, 2470.810325400605, 2507.617561216706, 
2103.3851498092467, 2509.9139304762707, 2472.6113935541243, 2465.4756667844886],
"eval_len": [1000, 527, 387, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859995/1000000 [7:50:15<53:23, 43.70it/s]global step 860000, trans_decision ep_re 1637.978921909397

{"global_step": 860000, "eval_re": [1992.2888061699337, 1581.571054145457, 
917.5944123815265, 2184.2067872186135, 621.664765419143, 2144.458318474821, 
2112.4452781806926, 1918.5443228177649, 2132.6235938792524, 774.3918804067665], 
"eval_len": [883, 757, 434, 1000, 266, 1000, 1000, 1000, 1000, 372]}

 87%|████████▋ | 869995/1000000 [7:55:45<49:54, 43.42it/s]global step 870000, trans_decision ep_re 1866.386815776359

{"global_step": 870000, "eval_re": [2220.438251400256, 2253.332087241502, 
2433.9545376357178, 2265.5269314158577, 2000.1145623682664, 2225.4149505133196, 
423.7518254938551, 2475.4859928246274, 804.1419527409449, 1561.7070661292466], 
"eval_len": [1000, 1000, 1000, 1000, 903, 1000, 209, 1000, 1000, 1000]}

 88%|████████▊ | 879995/1000000 [8:01:15<45:56, 43.53it/s]global step 880000, trans_decision ep_re 1898.7514837568863

{"global_step": 880000, "eval_re": [467.2829473819538, 80.43684663300097, 
1936.711239015442, 2376.0562526044964, 2479.0748731569292, 2096.3896454402493, 
2319.182882489059, 2421.5041460617144, 2632.539250010998, 2178.3367547750163], 
"eval_len": [201, 54, 820, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889995/1000000 [8:06:35<1:05:21, 28.05it/s]global step 890000, trans_decision ep_re 1986.734885021336

{"global_step": 890000, "eval_re": [2412.73832187253, 2550.867252828, 
2539.102512051825, 1876.6316877890076, 949.4451987334259, 1563.1216000639538, 
2320.833201612137, 2112.9306171074118, 1146.6218989621839, 2395.0565591928835], 
"eval_len": [1000, 1000, 1000, 751, 447, 691, 1000, 958, 1000, 1000]}

 90%|████████▉ | 899995/1000000 [8:12:15<38:21, 43.46it/s]global step 900000, trans_decision ep_re 1750.508671620183

{"global_step": 900000, "eval_re": [866.7601954287586, 2375.359648479721, 
1022.4079015351795, 1132.3602009437263, 2417.245951000867, 2374.0169507410556, 
398.4817876086665, 2148.0880680408836, 2232.937697755392, 2537.42831466758], 
"eval_len": [368, 1000, 445, 1000, 1000, 1000, 158, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [8:17:35<34:20, 43.68it/s]global step 910000, trans_decision ep_re 1275.7434261442352

{"global_step": 910000, "eval_re": [375.72649160360504, 2204.475269795234, 
2509.6912580462613, 1157.8484678524487, 1764.0216608071494, 650.1527138950265, 
374.55766984090167, 2574.639654329693, 689.8249446773672, 456.4961305946631], 
"eval_len": [183, 933, 1000, 516, 629, 294, 160, 1000, 280, 192]}

 92%|█████████▏| 919995/1000000 [8:22:55<30:34, 43.61it/s]global step 920000, trans_decision ep_re 1732.2240541985968

{"global_step": 920000, "eval_re": [752.2211492634866, 2283.275389615062, 
2321.923513235163, 519.3302874739712, 2244.951074951164, 2296.4653616793266, 
2344.323167801676, 2120.820623742852, 2124.855299274844, 314.0746749484236], 
"eval_len": [334, 1000, 1000, 245, 1000, 1000, 1000, 1000, 1000, 158]}

 93%|█████████▎| 929995/1000000 [8:28:25<26:51, 43.45it/s]global step 930000, trans_decision ep_re 2119.0683953664907

{"global_step": 930000, "eval_re": [2468.0547363226992, 2460.334686778901, 
962.4519008659332, 1472.1909318933206, 2355.449293155423, 1657.1958636938637, 
2503.4545885427874, 2383.7882000611703, 2365.8077115295578, 2561.9560408212533],
"eval_len": [1000, 1000, 390, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939995/1000000 [8:34:05<22:54, 43.66it/s]global step 940000, trans_decision ep_re 1855.051986994741

{"global_step": 940000, "eval_re": [1491.9572085780305, 2030.659352821841, 
1725.335392410931, 2410.613315955452, 2448.2480831291555, 859.3472197655897, 
2530.325222943335, 2192.293458210593, 2396.660138631806, 465.0804775006772], 
"eval_len": [1000, 918, 1000, 1000, 1000, 428, 1000, 1000, 1000, 222]}

 95%|█████████▍| 949995/1000000 [8:39:35<18:57, 43.95it/s]global step 950000, trans_decision ep_re 1904.4520113433118

{"global_step": 950000, "eval_re": [2074.864036397447, 405.30021783696174, 
2320.8303830193754, 2386.288300322556, 1340.985415463234, 2324.3926190413904, 
2220.2362382375522, 1383.3768193463243, 2268.649827282131, 2319.596256486147], 
"eval_len": [1000, 192, 1000, 1000, 541, 1000, 1000, 598, 1000, 1000]}

 96%|█████████▌| 959995/1000000 [8:45:05<24:31, 27.19it/s]global step 960000, trans_decision ep_re 1939.8448644338769

{"global_step": 960000, "eval_re": [1455.4697346132064, 1220.0467864576058, 
2500.378460307323, 2618.4602630750933, 1194.9040058430644, 1702.3631766219635, 
2441.7466436978016, 1396.2989414450217, 2359.6813414740313, 2509.0992908036555],
"eval_len": [582, 461, 1000, 1000, 483, 690, 927, 652, 1000, 1000]}

 97%|█████████▋| 969995/1000000 [8:50:25<11:23, 43.88it/s]global step 970000, trans_decision ep_re 1805.6897969611423

{"global_step": 970000, "eval_re": [2425.957248790598, 259.3360834899925, 
721.2472418403788, 2448.7982685757224, 1911.3126499984864, 2379.740858115217, 
2490.5915345548583, 2228.788067022276, 2447.0606085168015, 744.0654087070918], 
"eval_len": [1000, 115, 331, 1000, 775, 1000, 1000, 1000, 1000, 376]}

 98%|█████████▊| 979995/1000000 [8:55:55<07:37, 43.68it/s]global step 980000, trans_decision ep_re 1459.596265292928

{"global_step": 980000, "eval_re": [559.9646521925848, 1105.6332707077524, 
2383.2270738029233, 366.51540566074874, 310.73036341653034, 449.023604911929, 
2266.071526069954, 2273.7535882173443, 2403.794199406812, 2477.2489685427017], 
"eval_len": [283, 494, 1000, 149, 155, 210, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989995/1000000 [9:01:25<03:50, 43.40it/s]global step 990000, trans_decision ep_re 1491.480050915631

{"global_step": 990000, "eval_re": [1597.5497116779923, 1106.527674978541, 
27.89272465889129, 2141.8712953365916, 2249.1871878767906, 616.3846838378176, 
1061.9123978363468, 2042.9583993955632, 1778.5644946789062, 2291.9519388788694],
"eval_len": [1000, 510, 27, 1000, 1000, 307, 1000, 1000, 905, 1000]}

100%|█████████▉| 999996/1000000 [9:06:55<00:00, 43.60it/s]global step 1000000, trans_decision ep_re 939.3627330784357

{"global_step": 1000000, "eval_re": [1149.6942433071126, 645.2836098913256, 
2495.70947476468, 122.88362069140555, 360.9134446130605, 220.09693556790478, 
972.0918889825653, 241.9640571090485, 826.6537355062957, 2358.3363203509575], 
"eval_len": [1000, 1000, 1000, 68, 155, 113, 429, 95, 1000, 1000]}

100%|██████████| 1000000/1000000 [9:07:05<00:00, 30.46it/s]
