
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 2,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9998/1000000 [02:40<5:52:13, 46.84it/s]global step 10000, trans_decision ep_re -84.31250964687997

{"global_step": 10000, "eval_re": [-245.00598401341958, -143.48144464951332, 
621.3395786601272, 59.71697814392001, -71.2368029803197, -195.85692277014863, 
-293.4654468015712, -270.2681072107, -372.8203169611195, 67.95337211394508], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19995/1000000 [08:00<6:02:16, 45.09it/s]global step 20000, trans_decision ep_re 1140.2389339437873

{"global_step": 20000, "eval_re": [1383.0297170951198, 1347.1733543304151, 
1231.2553206525554, 1225.921837704227, 280.173740323276, 1564.091270066872, 
69.03783703544802, 1824.4059219930323, 844.8049943976198, 1632.4953458393043], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29995/1000000 [13:20<5:54:11, 45.64it/s]global step 30000, trans_decision ep_re 1864.608164556835

{"global_step": 30000, "eval_re": [2226.778953037893, 948.71654180678, 
2028.1105923326377, 2174.4553259718778, 2539.2014275150736, 724.6836031322073, 
2206.872237464945, 2052.3645673587016, 2027.3359430732037, 1717.5624538750264], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39995/1000000 [18:40<5:55:18, 45.03it/s]global step 40000, trans_decision ep_re 2676.6005975143826

{"global_step": 40000, "eval_re": [2804.245704087005, 2592.7733466436257, 
2447.034792183398, 2886.2031046903708, 1900.2038877304428, 3272.0744778558537, 
2758.827932339099, 2656.3427584186675, 2562.1700332739847, 2886.129937921374], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49995/1000000 [24:00<5:42:10, 46.27it/s]global step 50000, trans_decision ep_re 3008.954444869927

{"global_step": 50000, "eval_re": [2628.8598948317767, 3270.7985229928277, 
3198.1285331407853, 3395.163722643249, 2655.562524392948, 2789.686078791547, 
2945.0584905119345, 3534.2380449424863, 2970.8028048346537, 2701.2458316170614],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59995/1000000 [29:20<7:23:55, 35.29it/s]global step 60000, trans_decision ep_re 3295.3211089313686

{"global_step": 60000, "eval_re": [3645.5236087709422, 3726.544995406832, 
3550.105696726047, 3022.7862795872097, 3462.909218895719, 3187.9881769620147, 
3888.2746373814566, 3774.5123100619026, 3651.1570815809623, 1043.4090839406003],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69996/1000000 [34:50<5:34:19, 46.36it/s]global step 70000, trans_decision ep_re 3715.1322851705686

{"global_step": 70000, "eval_re": [3328.4563181731, 3345.6052477411126, 
4054.4288591491404, 4379.6125069845075, 3853.9440346412703, 3832.625954515526, 
3671.720248867157, 3192.6756986024056, 3491.959696757298, 4000.2942862741625], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79998/1000000 [40:10<5:33:44, 45.94it/s]global step 80000, trans_decision ep_re 3135.4706035913596

{"global_step": 80000, "eval_re": [3497.597790845816, 4029.643092932252, 
2988.0951427190553, 3532.3412709451836, 4252.818141631251, 2603.571720071694, 
2720.402694691783, 3421.0722006397064, 847.4104915664907, 3461.7534898703675], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89996/1000000 [45:20<5:28:11, 46.21it/s]global step 90000, trans_decision ep_re 3768.7535149152127

{"global_step": 90000, "eval_re": [4574.260982947249, 3887.855174694112, 
1576.296685106331, 3865.9237177906075, 4662.518647293308, 4051.9881927613988, 
4064.1869168235853, 3579.415162352801, 4303.8600161937575, 3121.229653188979], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [50:40<5:21:00, 46.73it/s]global step 100000, trans_decision ep_re 3679.9420863683467

{"global_step": 100000, "eval_re": [518.4461120775602, 3844.1120820318943, 
4255.508785394219, 4637.254371669702, 4204.289660539114, 4475.928561789622, 
3650.6433942760086, 3417.5093224324064, 4501.291762864777, 3294.4368106081647], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [55:40<5:09:01, 48.00it/s]global step 110000, trans_decision ep_re 4726.58185089881

{"global_step": 110000, "eval_re": [4862.701079136367, 4932.048439012588, 
4219.004333154244, 4755.314656472055, 5277.288750491246, 4659.715263934223, 
4863.295330703344, 4889.057411165919, 4805.7399418592995, 4001.653303058828], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:00:30<5:05:13, 48.05it/s]global step 120000, trans_decision ep_re 4704.121572084476

{"global_step": 120000, "eval_re": [4596.685911100493, 3848.52304212208, 
5360.81533123959, 4901.888010181073, 4794.397513562613, 5321.690249162067, 
4605.743027275304, 3824.6286321955217, 5138.621774340361, 4648.222229665665], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [1:05:50<5:20:13, 45.28it/s]global step 130000, trans_decision ep_re 4069.4982030793335

{"global_step": 130000, "eval_re": [3808.8338799600742, 3738.9292419052294, 
3618.576336070136, 4119.116724030373, 4444.201325033088, 3886.426341141263, 
4436.492015870896, 4053.6006125698364, 4565.657584664924, 4023.147969547514], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139998/1000000 [1:11:10<5:05:22, 46.94it/s]global step 140000, trans_decision ep_re 4156.413210550522

{"global_step": 140000, "eval_re": [4755.812038990325, 3808.5119707702497, 
5105.243007153254, 4604.184467997737, 5100.149611172825, 533.5004441447281, 
5087.46277000733, 3712.501576836811, 3761.5129846179448, 5095.253233814022], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149996/1000000 [1:16:21<4:57:13, 47.66it/s]global step 150000, trans_decision ep_re 4380.104499751954

{"global_step": 150000, "eval_re": [5709.797262163863, 4666.030591319458, 
5684.081912233241, 3971.4581523152874, 4873.286373658514, 4943.638047969262, 
4771.476123212694, 4039.55196276255, 4974.122892362077, 167.601679522589], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [1:21:31<6:18:47, 36.96it/s]global step 160000, trans_decision ep_re 4592.186697933646

{"global_step": 160000, "eval_re": [5095.223622610002, 6155.9288559695215, 
1916.9739648079528, 4848.337372834698, 4262.3711420364925, 5056.809778366546, 
4839.812002480071, 4282.465802870775, 4953.494019018566, 4510.450418341835], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169998/1000000 [1:26:51<5:11:47, 44.37it/s]global step 170000, trans_decision ep_re 4701.537667256474

{"global_step": 170000, "eval_re": [4732.64957008869, 4905.115754829991, 
4423.694180274392, 4313.244760172964, 5176.202897530281, 4676.428846164697, 
4565.614580305604, 3694.8067311852274, 5134.587058633797, 5393.032293379101], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [1:32:01<5:45:09, 39.60it/s]global step 180000, trans_decision ep_re 4515.065176641513

{"global_step": 180000, "eval_re": [4733.436266573859, 4982.763111678902, 
4511.667026262785, 4469.839452824712, 4547.212798691973, 4892.241559444701, 
3916.0971648610102, 4074.2189802885387, 4368.8300209750605, 4654.345384813601], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189998/1000000 [1:37:11<4:44:28, 47.46it/s]global step 190000, trans_decision ep_re 4562.6638877815285

{"global_step": 190000, "eval_re": [4749.9371644921175, 171.88002243131507, 
5674.150142829586, 5722.127117249758, 4359.87851774461, 5061.214918229407, 
4726.13376409867, 4634.311015020518, 4645.3758627278585, 5881.630352991443], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [1:42:21<4:51:29, 45.74it/s]global step 200000, trans_decision ep_re 5625.0557139369275

{"global_step": 200000, "eval_re": [5605.49778530739, 5374.163078963596, 
5233.656937378425, 6474.71532667228, 6159.014886781682, 5379.49243355852, 
5917.862645274697, 4523.151482664483, 5558.11742098516, 6024.885141783044], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209995/1000000 [1:47:31<4:47:28, 45.80it/s]global step 210000, trans_decision ep_re 1714.5579288733468

{"global_step": 210000, "eval_re": [2602.6310177664773, 1759.4461852154122, 
139.5284479746631, 622.1041934510006, 1518.1659777734546, 1833.7649413746353, 
1673.5407247403307, 2650.574567629091, 2073.892436380847, 2271.9307964275595], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219995/1000000 [1:52:41<4:39:35, 46.50it/s]global step 220000, trans_decision ep_re 203.37117559490886

{"global_step": 220000, "eval_re": [361.7781399978074, 295.84314908305294, 
55.98490491459912, 4.549549280333123, 310.865440035421, 141.49941008688796, 
291.42748698114434, 62.35258089719059, 462.2807492602759, 47.13034541237601], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229995/1000000 [1:57:51<4:40:55, 45.68it/s]global step 230000, trans_decision ep_re 1022.3433577123518

{"global_step": 230000, "eval_re": [951.3369132723996, 1501.0877609804024, 
353.34840427566985, 1053.9690208749146, 992.2610258767263, 825.0276656422874, 
1845.004584964566, 166.41204745998695, 1077.8260522362027, 1457.1601015403605], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239995/1000000 [2:03:11<4:33:11, 46.37it/s]global step 240000, trans_decision ep_re 733.329479762214

{"global_step": 240000, "eval_re": [514.3168135204166, 214.96703993349738, 
737.3595141274618, 905.5896666706342, 1565.1521218138116, 1403.363231289338, 
93.79454564399457, 281.8542861312264, 1112.7624554739257, 504.13512301783373], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [2:08:21<4:22:11, 47.67it/s]global step 250000, trans_decision ep_re 163.28565933678962

{"global_step": 250000, "eval_re": [264.39055610402653, 219.88466055643147, 
235.01573750830795, -16.44371772218331, 10.627294226387539, -34.2791815462134, 
108.83302615782917, 610.5303888218206, 51.34647589330933, 182.9513533681803], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259997/1000000 [2:13:31<4:25:49, 46.40it/s]global step 260000, trans_decision ep_re 674.312429999746

{"global_step": 260000, "eval_re": [657.1932817102992, 1317.0313341511876, 
1519.2395674315962, 214.65335278591837, 405.08838135861265, 612.6877763257306, 
55.85017850326908, 664.4223609013364, 498.45185473479273, 798.5062120947175], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [2:18:51<4:16:22, 47.46it/s]global step 270000, trans_decision ep_re 206.65714726277875

{"global_step": 270000, "eval_re": [-72.13937101954208, -33.15680756339535, 
243.1962399399324, 185.70015594001194, 331.75246028798614, 208.0659647184066, 
84.05741375201555, 640.6619580604056, 403.5672102676213, 74.86624824434526], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279997/1000000 [2:24:01<4:22:30, 45.71it/s]global step 280000, trans_decision ep_re 123.8293005184917

{"global_step": 280000, "eval_re": [175.87474007344017, 370.7248543446946, 
6.733310307780652, 56.03129261393934, 79.89175189698986, 63.666258594606724, 
63.582527754766865, 56.47906031915542, 303.581133926935, 61.72807535260826], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [2:29:11<4:15:58, 46.23it/s]global step 290000, trans_decision ep_re 239.3163936253514

{"global_step": 290000, "eval_re": [326.0740204180172, 244.93635851374341, 
59.2021493195746, 645.8262006956892, 125.66317699301418, -68.24277069026299, 
203.700371563165, 66.02697547053896, 123.57772412549633, 666.399729844538], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299995/1000000 [2:34:21<4:14:18, 45.88it/s]global step 300000, trans_decision ep_re 1024.817702021467

{"global_step": 300000, "eval_re": [1570.8216274553913, 1499.6177273550188, 
667.5008900759939, 421.9046714583408, 1006.1011544373158, 1115.1688480184666, 
1560.9131090023957, 567.6558407645473, 145.67290598002805, 1692.8202456671738], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309996/1000000 [2:39:41<4:09:20, 46.12it/s]global step 310000, trans_decision ep_re 429.95042269634484

{"global_step": 310000, "eval_re": [11.124290881823184, 185.10688292986208, 
187.8276129349789, 586.0648796507362, 93.24834295659055, 25.46708165717331, 
359.47016044661376, 207.49201275264588, 1201.070391518817, 1442.6325712342073], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319997/1000000 [2:44:51<4:12:52, 44.82it/s]global step 320000, trans_decision ep_re 374.5041170630921

{"global_step": 320000, "eval_re": [29.050157087168476, 724.3947750029661, 
218.83071054163838, 35.50384847931423, 337.5130281646064, 787.6992501720065, 
209.9601830950502, 1038.1583137275047, -18.15132395181347, 382.0822283124794], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329998/1000000 [2:50:11<3:54:53, 47.54it/s]global step 330000, trans_decision ep_re 813.175928680309

{"global_step": 330000, "eval_re": [458.0852297378003, 1576.5965105754742, 
372.13305851200903, 1286.9982221866103, 757.4313808197236, 987.9915294660441, 
1359.6204848067716, 161.64476144510897, 1295.7301910707724, 
-124.47208181722482], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

 34%|███▍      | 339997/1000000 [2:55:21<3:58:27, 46.13it/s]global step 340000, trans_decision ep_re 305.412228605631

{"global_step": 340000, "eval_re": [259.3107221512768, 104.67190609632783, 
396.17644905768617, 603.5996521654483, 51.87204085833299, 199.43564726118512, 
438.4781723587935, 286.15093620943315, 478.51938274907855, 235.90737714874774], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349995/1000000 [3:00:41<3:53:55, 46.31it/s]global step 350000, trans_decision ep_re 508.7136049314373

{"global_step": 350000, "eval_re": [876.0434405320947, 181.04492554966265, 
72.47531110215948, 784.4634075788074, 657.6405502986847, 390.8393736718501, 
160.0788734342578, 16.870580801264406, 618.6170161526921, 1329.0625701929002], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359996/1000000 [3:05:52<3:53:47, 45.62it/s]global step 360000, trans_decision ep_re 425.9444551982077

{"global_step": 360000, "eval_re": [205.50158646035445, 1027.4839212500226, 
791.4778992990267, 183.122917607011, 20.786544533713165, 44.26503323166623, 
358.9070280982677, 831.2227890732254, 102.60931579467129, 694.0675166341191], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [3:11:12<3:39:22, 47.86it/s]global step 370000, trans_decision ep_re 456.59852241168693

{"global_step": 370000, "eval_re": [37.65662138132533, 909.7968036371027, 
120.92047178956217, 819.3046225053874, 1261.1208233359503, 710.4889268316617, 
65.77146298840874, 356.622553206591, 271.4739944375941, 12.828944003285752], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379995/1000000 [3:16:32<3:50:44, 44.78it/s]global step 380000, trans_decision ep_re 510.07094488146123

{"global_step": 380000, "eval_re": [243.59524248969592, 786.7029633961927, 
421.12863308286586, 379.43870130038954, 158.5842384421427, 657.708874783958, 
574.3327355495835, 847.9069247911659, 66.55534989756949, 964.7557850810479], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [3:21:42<3:42:32, 45.68it/s]global step 390000, trans_decision ep_re 565.2182311645738

{"global_step": 390000, "eval_re": [104.5103964284737, 1039.4435456168985, 
503.20269159817445, 832.4981829607214, 43.30219377876443, 930.4754315674785, 
525.0473769570536, 173.96015505282102, 473.2284194136325, 1026.5139182717196], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [3:27:02<3:31:15, 47.33it/s]global step 400000, trans_decision ep_re 354.5866602062923

{"global_step": 400000, "eval_re": [1088.1871540266266, 1211.5011041600403, 
47.72759710930668, 128.2929166225748, 420.4501456617519, 165.3147285647219, 
-8.960264244504158, 285.63025071869686, 212.0815392994989, -4.358569855790301], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [3:32:12<3:33:27, 46.07it/s]global step 410000, trans_decision ep_re 806.5755731378458

{"global_step": 410000, "eval_re": [323.1213207266688, 1590.6552704236124, 
1224.4148941428123, 1462.3513157418026, 295.52631961352995, 1366.1892070559657, 
771.3056974996692, 563.8763473257025, 177.12639525998486, 291.1889635887099], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [3:37:32<3:29:32, 46.13it/s]global step 420000, trans_decision ep_re 590.6090266614266

{"global_step": 420000, "eval_re": [488.05223011565596, 614.1527346684804, 
709.2116739387202, 222.55725149113405, 756.1353457302711, 489.0462821918881, 
685.8057365113368, 771.624171919019, 79.2302763056326, 1090.2745637421278], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429995/1000000 [3:42:42<5:55:25, 26.73it/s]global step 430000, trans_decision ep_re 457.47973638359844

{"global_step": 430000, "eval_re": [727.1904458599874, 587.9046769136494, 
24.816219536002528, 190.37521686591498, 297.0100788750446, 542.4501297977483, 
54.20708595150834, 468.6934064871019, 1551.6452608980533, 130.50484265097344], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439995/1000000 [3:48:02<3:23:22, 45.89it/s]global step 440000, trans_decision ep_re 755.6718895303904

{"global_step": 440000, "eval_re": [1153.708216935049, 419.50449077106583, 
162.01519295070682, 701.1106726490502, 1127.2274027767535, 1223.406441605301, 
185.21959694571876, 1265.5404196887002, 1028.013574829318, 290.9728861522404], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [3:53:12<3:17:36, 46.39it/s]global step 450000, trans_decision ep_re 802.2407523473994

{"global_step": 450000, "eval_re": [1440.517017260316, 653.8545031672853, 
1366.868809336689, 1632.6241786534204, 220.23092285137804, 556.6192246561739, 
342.23805441650325, 1036.3968614044466, 254.77157430904435, 518.2863774187376], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459995/1000000 [3:58:22<3:09:38, 47.46it/s]global step 460000, trans_decision ep_re 921.7523404077562

{"global_step": 460000, "eval_re": [1343.9204524698162, 815.1849954579392, 
448.41037026382617, 1069.4227456348096, 871.7606316854084, 540.9470978135915, 
1033.5375581422816, 1834.7181316348938, 1019.6950799611034, 239.926341013892], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469997/1000000 [4:03:22<3:01:23, 48.70it/s]global step 470000, trans_decision ep_re 849.1667236190733

{"global_step": 470000, "eval_re": [1053.9043345511336, 325.5143805895579, 
287.0627359980373, 990.4707007179579, 364.95049335191914, 2134.0384703372606, 
1329.3675788060702, 882.9111260155651, 799.1845262358407, 324.2628895873909], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479997/1000000 [4:08:22<3:01:32, 47.74it/s]global step 480000, trans_decision ep_re 973.6394526101128

{"global_step": 480000, "eval_re": [1776.7963266065556, 29.635479408164805, 
190.61176418914638, 1624.639088938664, 527.8053552303787, 1411.979885573539, 
1199.9775100900997, 835.8032752678697, 920.9785138155967, 1218.1673269811142], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489995/1000000 [4:13:22<2:50:52, 49.74it/s]global step 490000, trans_decision ep_re 744.2681497362597

{"global_step": 490000, "eval_re": [782.7181235297223, 401.3399111931061, 
1751.7113417286105, 166.4694393371655, 1634.4490149731023, 411.00622246546476, 
343.7849818597098, 119.64361287387203, 1544.116004675227, 287.4428447266164], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [4:18:23<2:47:09, 49.85it/s]global step 500000, trans_decision ep_re 665.3198626858318

{"global_step": 500000, "eval_re": [331.4649349217225, 72.24041911495787, 
754.9178857501541, 201.5776552404207, 1484.442057768123, 58.54764265871662, 
1329.9005281262403, 775.9855219112759, 733.7541022189661, 910.367879147741], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509995/1000000 [4:23:13<2:42:08, 50.37it/s]global step 510000, trans_decision ep_re 317.5696973601829

{"global_step": 510000, "eval_re": [343.1249912388742, 432.5749522667016, 
196.3828926059592, 183.93446791693145, 733.0604412539457, 401.67531597482025, 
486.6600781190458, 306.2227673360685, 30.029349185126165, 62.031717704356105], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519997/1000000 [4:28:13<2:59:14, 44.63it/s]global step 520000, trans_decision ep_re 1003.574151487267

{"global_step": 520000, "eval_re": [1159.7217331317522, 1407.3198841808012, 
887.2834216433798, 363.55633406256715, 1238.6159510909497, 1331.3863811827214, 
1401.7269179357027, 430.5236087859241, 1301.7646424754616, 513.8426403834093], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529995/1000000 [4:33:13<2:45:54, 47.22it/s]global step 530000, trans_decision ep_re 55.38552810197626

{"global_step": 530000, "eval_re": [96.44226932060855, 65.54404359205483, 
77.80821346800482, 14.03956686417614, 131.97920228996284, 25.269119301054875, 
105.4253279824541, -29.88283094755492, 65.05437154520246, 2.175997603798883], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [4:38:13<2:40:37, 47.73it/s]global step 540000, trans_decision ep_re 977.1276035977087

{"global_step": 540000, "eval_re": [1085.3267423163632, 790.2696644138936, 
785.2954972172224, 1017.0051373249887, 999.1720578116458, 805.9090712683997, 
722.8618116309152, 1386.000579887284, 963.7538208391197, 1215.6816532672572], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [4:43:23<2:37:27, 47.63it/s]global step 550000, trans_decision ep_re 461.3239022799504

{"global_step": 550000, "eval_re": [61.11025921108742, 376.3861131597503, 
469.108258217801, 1723.992697191832, 723.8116432613343, 123.52091210642189, 
88.92951376983972, 601.206884296287, 271.47204354685437, 173.70069803829622], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559997/1000000 [4:48:23<2:32:20, 48.14it/s]global step 560000, trans_decision ep_re 1215.4116255889273

{"global_step": 560000, "eval_re": [1333.537227311923, 1808.8256800889485, 
192.7755919381315, 225.48977750665642, 1295.8248202994525, 1449.733706235982, 
1748.1308711037873, 1292.4394840313166, 1475.4194336516175, 1331.93966372146], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569995/1000000 [4:53:23<2:30:28, 47.63it/s]global step 570000, trans_decision ep_re -22.98093745925545

{"global_step": 570000, "eval_re": [-17.81391808898295, -36.217795875184265, 
-17.355117660758832, -30.625396163345137, -31.87289507059824, -9.15472897443985,
-35.2404119002935, -25.645601351480952, -15.836440320217024, 
-10.047069187253731], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

 58%|█████▊    | 579997/1000000 [4:58:23<2:26:20, 47.83it/s]global step 580000, trans_decision ep_re 1045.544947346773

{"global_step": 580000, "eval_re": [395.49455038821725, 1629.3553638756114, 
167.89760375255608, 1527.5445938995495, 877.0780174540125, 492.7557025804091, 
1144.1620154179134, 1839.8968101494615, 1597.6099879438173, 783.6548280061793], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589995/1000000 [5:03:33<2:22:49, 47.84it/s]global step 590000, trans_decision ep_re 498.25216883464043

{"global_step": 590000, "eval_re": [813.8691214779816, 373.5363984249965, 
198.73456478212492, 541.9474054064174, 1211.1870325899636, 563.6668589649117, 
355.6713376524466, 345.2959461129541, 313.7123809288678, 264.9006420057403], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599997/1000000 [5:08:33<2:19:36, 47.75it/s]global step 600000, trans_decision ep_re 177.6293617980741

{"global_step": 600000, "eval_re": [217.20293168571555, 73.50397173993775, 
260.3229184167047, 256.5788178726364, 143.71701507185952, 58.002484758577616, 
136.33526319429072, 171.72684600410668, 350.58917597931026, 108.3141932576016], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [5:13:33<2:15:25, 48.00it/s]global step 610000, trans_decision ep_re 593.0586784006924

{"global_step": 610000, "eval_re": [1211.4801826599362, 200.26010671900818, 
432.80334567937075, 373.7845991605837, 957.9477010173894, 582.5072180756915, 
40.12771821608757, 594.6590936320383, 781.2692993323592, 755.7475195144602], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [5:18:33<2:11:37, 48.12it/s]global step 620000, trans_decision ep_re 1078.608083247716

{"global_step": 620000, "eval_re": [1414.2306619456756, 694.3432926490859, 
776.5693971976644, 1601.7822777296337, 1160.2973429244043, 981.545416749331, 
1535.064595033989, 1250.2227444676028, 725.1426417181452, 646.8824620616288], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629995/1000000 [5:23:43<2:07:35, 48.33it/s]global step 630000, trans_decision ep_re 602.8965088419268

{"global_step": 630000, "eval_re": [702.6400752894621, 274.65944638717264, 
90.59053235119683, 1172.744849285052, 180.0695535812511, 1090.7399130221072, 
1151.1123180779405, 413.75773989095734, 446.83634943463517, 505.81431109949295],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639997/1000000 [5:28:43<2:05:24, 47.85it/s]global step 640000, trans_decision ep_re 1195.697226173255

{"global_step": 640000, "eval_re": [901.1855103848287, 1094.8559492963134, 
1254.5734777945693, 1369.452143839057, 1154.2733016953453, 885.7831325978874, 
1401.308718977362, 1276.2226474997283, 1245.700334713099, 1373.6170449343604], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649995/1000000 [5:33:43<2:01:45, 47.91it/s]global step 650000, trans_decision ep_re 1306.8819762428777

{"global_step": 650000, "eval_re": [1713.3919378123305, 1294.7965136538746, 
744.1679666072464, 1277.959287499234, 1585.2022448664648, 1220.7716187381982, 
1022.2096220348692, 1433.8995220211839, 1107.4627691874664, 1668.9582800079077],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659998/1000000 [5:38:43<1:58:21, 47.88it/s]global step 660000, trans_decision ep_re 358.7596124777272

{"global_step": 660000, "eval_re": [704.7343790720395, 286.93422823043045, 
331.052089701876, 66.64556707160067, 189.5660721915741, 251.42314957624635, 
311.1480529189117, 628.7111385213851, 276.76398638974166, 540.6174611034661], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669997/1000000 [5:43:43<1:54:31, 48.02it/s]global step 670000, trans_decision ep_re 626.4893831560947

{"global_step": 670000, "eval_re": [696.832827903061, 294.65394524345794, 
99.83408642552344, 511.66146547122037, 407.59066663173616, 1182.1045739121266, 
999.9417316804877, -4.2032247912137715, 1308.6237489304167, 767.8540101541312], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679996/1000000 [5:48:53<1:50:35, 48.22it/s]global step 680000, trans_decision ep_re 89.4214694217172

{"global_step": 680000, "eval_re": [-32.02502532989213, -18.875624715917812, 
25.42601933229122, 67.26330110569265, -32.91254816068824, 204.0220627027524, 
553.711665206849, 194.6744982210083, -29.251355993217008, -37.818298151706465], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689997/1000000 [5:53:53<1:47:00, 48.28it/s]global step 690000, trans_decision ep_re -24.568909753616772

{"global_step": 690000, "eval_re": [-26.151460180103236, -23.409679095495765, 
-20.960738732590354, -25.503107570693984, -27.079530220842084, 
-22.883819340947635, -21.965475760683894, -27.235978775106645, 
-27.27417181938728, -23.22513604031683], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699997/1000000 [5:58:54<1:43:39, 48.23it/s]global step 700000, trans_decision ep_re 867.9359684830431

{"global_step": 700000, "eval_re": [558.0025804644885, 221.27576365312433, 
1496.9903684589513, -25.45044494147206, 521.0217094114579, 1625.7999623749706, 
1054.5269495114942, 1374.3532950189535, 784.8799963336119, 1067.9595045448507], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [6:04:04<1:40:47, 47.95it/s]global step 710000, trans_decision ep_re 508.1032358504276

{"global_step": 710000, "eval_re": [840.259425104286, 6.095701388222888, 
685.0667581714071, 335.7510339390469, 1263.520844926272, 151.07554967393688, 
58.202568846110225, 1114.928619879343, 572.4922612994367, 53.63959527621449], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719997/1000000 [6:09:04<1:37:17, 47.97it/s]global step 720000, trans_decision ep_re 317.37298120659324

{"global_step": 720000, "eval_re": [351.6058010113098, 282.05152366969827, 
96.47482048211276, 571.3674468512587, 146.65162912474614, 763.0241011370268, 
278.0614474250647, 490.7456804115271, 132.97400536813404, 60.77335658505467], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729996/1000000 [6:14:04<1:32:52, 48.45it/s]global step 730000, trans_decision ep_re 597.8464265773157

{"global_step": 730000, "eval_re": [509.6940537415485, 865.982249359399, 
889.3783882360674, 221.3033870532392, 168.43226841419036, 1123.3366455634684, 
991.8901447797143, 840.8797020904633, 339.22588806935806, 28.341538465709192], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739995/1000000 [6:19:14<1:29:57, 48.17it/s]global step 740000, trans_decision ep_re 203.8506547241541

{"global_step": 740000, "eval_re": [-23.26475381894979, 382.8088669794983, 
310.95482372218913, 62.47816341004553, 297.4236897032408, 194.08965073992604, 
67.73366258946868, 282.34694321883353, 225.51320353959673, 238.42229715769182], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [6:24:14<1:25:50, 48.54it/s]global step 750000, trans_decision ep_re 190.353194591516

{"global_step": 750000, "eval_re": [114.09130626613072, 85.47872295141642, 
315.415640565835, 325.5719056710423, 279.1627455072255, 117.92531670316025, 
139.32335046787566, 238.78893679000143, 82.61634217997708, 205.1576788124955], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759995/1000000 [6:29:14<1:23:05, 48.14it/s]global step 760000, trans_decision ep_re 492.6068145182925

{"global_step": 760000, "eval_re": [1127.463549069126, 186.06353699008835, 
301.0274816842848, 216.62295752517, 329.87895408841564, 909.4268988469827, 
470.55227853578197, -3.6333927965995088, 754.6006380596518, 634.0652431800232], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [6:34:24<1:19:43, 48.08it/s]global step 770000, trans_decision ep_re 144.03551949532158

{"global_step": 770000, "eval_re": [84.50448501951307, 199.08055429267742, 
3.575340803746148, 72.13502528386414, 734.7522122684255, 233.73312330229382, 
6.219636109137536, 6.255938062460878, 26.69986538752439, 73.39901442357292], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779995/1000000 [6:39:24<1:15:26, 48.60it/s]global step 780000, trans_decision ep_re 1240.4766259848313

{"global_step": 780000, "eval_re": [1216.9755285325266, 796.1834478456072, 
1869.0789049646964, 1082.6960593361084, 2077.88701629602, 1730.710699202069, 
857.6922347886311, 45.640108900366734, 1968.527067591107, 759.3751923911798], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789997/1000000 [6:44:24<1:12:16, 48.42it/s]global step 790000, trans_decision ep_re 330.31107537389346

{"global_step": 790000, "eval_re": [148.36285341423059, 528.258867136765, 
344.91226583741803, 149.4363772239591, 1131.8598347540958, -16.773817340533466, 
184.9654873607712, -20.503416697225248, 862.7172386299565, -10.124936580502773],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799995/1000000 [6:49:34<1:10:26, 47.32it/s]global step 800000, trans_decision ep_re 577.0482616455372

{"global_step": 800000, "eval_re": [456.18362907438313, 133.6897798475266, 
987.739209372758, 211.659823798621, 673.6027546475442, 829.1010112595419, 
333.1897364603018, 287.5064590072264, 61.93312464940085, 1795.877088338068], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [6:54:44<1:09:31, 45.55it/s]global step 810000, trans_decision ep_re -34.80492550699071

{"global_step": 810000, "eval_re": [-34.11640528042298, -32.46045874298432, 
-33.918696519565614, -32.9003462995219, -36.59287584338356, -30.828868802050394,
-35.894622515846045, -37.002908619394745, -40.10983934812146, 
-34.22423309861612], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

 82%|████████▏ | 819996/1000000 [6:59:54<1:02:28, 48.03it/s]global step 820000, trans_decision ep_re 592.0888855645172

{"global_step": 820000, "eval_re": [1766.821642544228, 790.1401792953756, 
695.4564891506717, 459.5894622747891, 38.89172243683718, 1043.3299653211925, 
119.17164846290383, 520.7384702692681, 198.5481882906664, 288.2010875992398], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829995/1000000 [7:05:04<59:53, 47.31it/s]global step 830000, trans_decision ep_re 818.7807935221274

{"global_step": 830000, "eval_re": [995.1595187929051, 1093.8761187208697, 
1085.3503893333832, 1155.867844909621, 739.7494130664362, 723.9545028760617, 
733.3648963378772, 269.5946111193809, 1044.4834040526507, 346.4072360120884], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839995/1000000 [7:10:14<58:54, 45.27it/s]global step 840000, trans_decision ep_re 1140.806278427267

{"global_step": 840000, "eval_re": [1003.4470297568727, 1150.9620554934409, 
1576.7398279129704, 1572.9820747858976, 1714.1059543630222, 175.70857192419055, 
1267.8596781241188, 597.3121756561126, 885.3258024149512, 1463.6196138410944], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849997/1000000 [7:15:24<53:44, 46.51it/s]global step 850000, trans_decision ep_re 949.1933281271893

{"global_step": 850000, "eval_re": [728.6066770105074, 518.4387332315827, 
1526.0960333925584, 1376.5201515839253, 1227.2312067013904, 1360.0499709713881, 
518.5915779536098, 437.67743208123136, 1166.6593420480972, 632.0621562976037], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859996/1000000 [7:20:34<48:45, 47.86it/s]global step 860000, trans_decision ep_re 1090.7547714028074

{"global_step": 860000, "eval_re": [1108.8408795088433, 1302.4107938899085, 
1226.2355151619456, 1883.1125267597758, 1594.1214166319933, 536.2232421896188, 
1348.4843762092655, 325.38838768178096, 544.9296068990258, 1037.8009690959161], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869995/1000000 [7:25:44<46:06, 47.00it/s]global step 870000, trans_decision ep_re 518.6812351134596

{"global_step": 870000, "eval_re": [448.93735657308247, 327.5251144758769, 
310.27319316481316, 74.56825194034163, 676.5933926850075, 215.16363276541992, 
1152.6013378278758, 968.2873136149735, 936.5213325709028, 76.34142551630188], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [7:30:54<42:23, 47.19it/s]global step 880000, trans_decision ep_re 1034.5966966338842

{"global_step": 880000, "eval_re": [888.108878967323, 872.1581062440507, 
1473.9330878103526, 841.0615873478007, 1443.645536303439, 1309.341534020688, 
1195.5798742843465, 182.013090618089, 1206.3180396123155, 933.8072311304378], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889998/1000000 [7:36:04<38:25, 47.71it/s]global step 890000, trans_decision ep_re 713.8844306108701

{"global_step": 890000, "eval_re": [678.6552098667423, 1302.7457437636124, 
508.1531541806522, 777.8139814268562, 412.4217209846341, 1010.1303044120251, 
60.69160636996882, 753.7078857112374, 619.7388064216358, 1014.7858929713368], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899997/1000000 [7:41:04<35:00, 47.62it/s]global step 900000, trans_decision ep_re 1223.9066471608808

{"global_step": 900000, "eval_re": [835.1323261491724, 926.0237001333844, 
1199.5239778165944, 1361.5207731034266, 1706.4518614620924, 1260.8163933422525, 
578.4716025640448, 1665.0178168611549, 1654.5764058448342, 1051.531614331852], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909998/1000000 [7:46:14<41:24, 36.23it/s]global step 910000, trans_decision ep_re 809.0359273088663

{"global_step": 910000, "eval_re": [460.3746287077871, 1241.425408696552, 
715.8198020494071, 1218.4626759269797, 164.77071811871315, 1166.4711941928551, 
683.584258472872, 555.9267547140494, 723.6824378661997, 1159.8413943432474], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [7:51:24<28:58, 46.02it/s]global step 920000, trans_decision ep_re 758.5861939297188

{"global_step": 920000, "eval_re": [1085.581142320984, -231.7688789298518, 
1265.573764987367, 24.66700766936045, 884.121238485597, 1543.3951959839733, 
1586.393654022708, 233.08250583139534, 722.6604840541402, 472.15582487151363], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929995/1000000 [7:56:34<24:37, 47.37it/s]global step 930000, trans_decision ep_re 947.9070998030277

{"global_step": 930000, "eval_re": [783.6891429243701, 865.8946589355166, 
1309.1068863948249, 837.2557217899588, 1636.7029236846379, 1103.6502992506933, 
308.1919817763749, 1176.5889905768524, 916.8874100115028, 541.102982685545], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939995/1000000 [8:01:44<21:25, 46.67it/s]global step 940000, trans_decision ep_re 127.26433277999676

{"global_step": 940000, "eval_re": [-20.434184983616188, 90.1746146902051, 
117.89177687968098, 160.1921482636054, 68.85889661564013, 81.1061798159694, 
168.46220473593945, 137.8172019825037, 96.6611613284934, 371.9133284715463], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949997/1000000 [8:06:54<17:29, 47.64it/s]global step 950000, trans_decision ep_re 736.1806485727151

{"global_step": 950000, "eval_re": [229.10715221229825, 1083.3406002066815, 
902.8186171695714, 961.2413347438054, 191.46911451872742, 223.05303832779458, 
582.6006714064663, 1181.4329786516048, 1258.9130377843628, 747.8299407058389], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [8:12:04<14:27, 46.09it/s]global step 960000, trans_decision ep_re 764.1398341629572

{"global_step": 960000, "eval_re": [1004.3113124498981, 1107.7070999094142, 
709.03837828304, 418.53791556802275, 754.0640697831582, 494.71159332860054, 
730.8165705127512, 1344.472855413513, 904.0915708256658, 173.64697555550939], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969995/1000000 [8:17:14<10:45, 46.52it/s]global step 970000, trans_decision ep_re 665.9518080165944

{"global_step": 970000, "eval_re": [425.9489959286193, 1319.2179272988697, 
278.88219870954094, 186.34680903527052, 1424.0065449449341, 730.0492121448636, 
273.48951285089237, 1370.8837503076913, 136.55296129790105, 514.1401676473611], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [8:22:24<07:17, 45.67it/s]global step 980000, trans_decision ep_re 902.6014069824629

{"global_step": 980000, "eval_re": [1052.6201419621118, 351.24253856022983, 
596.3073935563064, 943.100470603942, 1204.7577855623829, 1210.0098193541576, 
1008.0659751191538, 928.0428239370515, 1235.990128642409, 495.87699252688486], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989995/1000000 [8:27:34<03:31, 47.38it/s]global step 990000, trans_decision ep_re 206.29208021430927

{"global_step": 990000, "eval_re": [85.78320277403361, 160.8693559494401, 
195.52623606439911, 224.93114592740417, 224.88679314455786, 438.4374796912819, 
515.3083343291969, 47.49109913824423, 167.16847200287762, 2.518683121657591], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999997/1000000 [8:32:44<00:00, 47.25it/s]global step 1000000, trans_decision ep_re 470.5269860857328

{"global_step": 1000000, "eval_re": [907.5664563037756, 455.0205593757997, 
56.44977381903838, 983.1174261611245, 262.6941796318899, 446.03858492442197, 
526.6892645098725, 258.458963394588, 195.46010208141567, 613.774550655402], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [8:33:03<00:00, 32.48it/s]
