
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9997/1000000 [05:30<12:54:26, 21.31it/s]global step 10000, trans_decision ep_re -136.38857131462058

{"global_step": 10000, "eval_re": [-126.80507365075403, -109.52531985958976, 
-110.96946184790492, -107.68170043164959, -19.56824502549247, 
-143.7334991599616, -122.98752630746817, -69.67799362329326, 
-450.26198259439434, -102.6749106456976], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [16:30<12:45:08, 21.35it/s]global step 20000, trans_decision ep_re -97.10285590799255

{"global_step": 20000, "eval_re": [-47.27096494321127, -93.49427031544154, 
0.1655852882912019, 8.708859198467875, -113.39912210680615, -157.694288307629, 
-143.56466644144461, -250.58395115451654, -75.66026542109908, 
-98.23547487653629], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [27:40<12:37:29, 21.34it/s]global step 30000, trans_decision ep_re -17.982569104937813

{"global_step": 30000, "eval_re": [-86.27784137197456, -68.05169392383718, 
-17.5585329681268, 166.66888196012894, -93.06236229309448, 61.29831372080864, 
-104.12805290849528, -68.82740560891536, -65.48536176469443, 95.5983641088224], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39998/1000000 [38:50<12:34:19, 21.21it/s]global step 40000, trans_decision ep_re 686.1077006848417

{"global_step": 40000, "eval_re": [815.440754573224, 528.7747963853406, 
737.2585087323364, 688.7049846696872, 805.3426838802993, 635.6854282397703, 
698.3710379332255, 696.2259806587083, 671.3311587943468, 583.9416729814781], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49999/1000000 [50:00<12:30:25, 21.10it/s]global step 50000, trans_decision ep_re 769.9016666466807

{"global_step": 50000, "eval_re": [595.6461636979428, 791.659280746829, 
848.2987816073073, 888.0398134577691, 836.0484628705186, 856.154675242472, 
829.9453023965403, 468.5482081706314, 805.6074077719936, 779.0685705048038], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59998/1000000 [1:01:00<12:11:53, 21.41it/s]global step 60000, trans_decision ep_re 882.2322767920474

{"global_step": 60000, "eval_re": [911.145744026089, 798.243218826014, 
808.5282838307286, 831.7816033458267, 915.7631619060179, 1049.5423076462082, 
742.657695097056, 871.4116128234932, 976.0672616983219, 917.1818787207178], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [1:12:10<12:12:48, 21.15it/s]global step 70000, trans_decision ep_re 936.3217228229576

{"global_step": 70000, "eval_re": [919.6773515308487, 959.4865392108544, 
968.6741874022246, 979.2603374680217, 1002.0820321193391, 925.491591469709, 
870.617394326687, 1075.7464614934179, 842.6680821964876, 819.5132510119853], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:23:20<12:05:57, 21.12it/s]global step 80000, trans_decision ep_re 920.0799354034003

{"global_step": 80000, "eval_re": [870.6580093466232, 880.3499810325077, 
778.1710533502215, 944.0636330916261, 933.0820992493847, 856.7339602256255, 
939.5989899454298, 832.0858565603187, 1246.985550923416, 919.0702203088504], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:34:20<11:52:04, 21.30it/s]global step 90000, trans_decision ep_re 948.178066652066

{"global_step": 90000, "eval_re": [859.0758827838769, 973.7756649604089, 
865.2414177179993, 1023.8998463841494, 905.729007759466, 1165.4832717158747, 
938.2383691151074, 906.2063424126355, 882.0295541247822, 962.1013095463596], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:45:30<11:52:56, 21.04it/s]global step 100000, trans_decision ep_re 914.2675833419219

{"global_step": 100000, "eval_re": [896.1796327060663, 1196.7482359158907, 
952.2867506413064, 1006.6324724778175, 818.9149765998753, 1006.0798610195421, 
900.8059732464272, 918.9297948123586, 773.2699365439793, 672.8281994559555], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [1:56:30<11:31:42, 21.44it/s]global step 110000, trans_decision ep_re 982.2533452338979

{"global_step": 110000, "eval_re": [950.1118014825223, 1031.9451097145916, 
880.7479943024698, 1160.066335276976, 965.0387940214431, 792.1622211972102, 
946.838296431732, 1076.7945205688811, 1079.4222333181046, 939.4061460250477], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119997/1000000 [2:07:30<11:24:07, 21.44it/s]global step 120000, trans_decision ep_re 1139.2461895588804

{"global_step": 120000, "eval_re": [1391.2430513048337, 1154.2350147771276, 
1312.5369992785043, 1381.6419435719185, 1141.193734099947, 1077.7368099480134, 
899.5574811928487, 1127.8000412362937, 965.4482102415228, 941.0686099377937], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [2:18:30<11:22:32, 21.24it/s]global step 130000, trans_decision ep_re 1157.9993645380341

{"global_step": 130000, "eval_re": [857.6760435186443, 1525.4405368135544, 
865.2684886966638, 1195.8961577340845, 1205.043964253815, 987.6220022590509, 
1428.6869294635653, 1184.9527555968855, 1028.4607670155735, 1300.9460000285032],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [2:29:40<11:13:32, 21.28it/s]global step 140000, trans_decision ep_re 1088.33131107903

{"global_step": 140000, "eval_re": [1050.7946031574895, 947.6252792781697, 
897.5850922514642, 963.5532962199713, 1214.2977702986648, 889.71979625469, 
1587.5873306045178, 1155.730771827421, 828.4004347364219, 1348.0187361614871], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:40:40<11:02:24, 21.39it/s]global step 150000, trans_decision ep_re 1155.6520141374515

{"global_step": 150000, "eval_re": [1195.546668210133, 901.3154947700884, 
911.1165796667583, 1153.7676446073192, 1904.4045807784803, 1164.7984759346002, 
1358.5351536110197, 1033.502450235792, 987.0716692395508, 946.4614243207731], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159999/1000000 [2:51:40<10:54:44, 21.38it/s]global step 160000, trans_decision ep_re 1351.7403481834206

{"global_step": 160000, "eval_re": [1234.803620251949, 902.9230938309729, 
1223.5340532542837, 1605.0593495655046, 1968.4265338963855, 1235.4288487200615, 
1583.7640491832212, 901.9437182973784, 1811.5431463607883, 1049.97706847366], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169998/1000000 [3:02:40<10:44:32, 21.46it/s]global step 170000, trans_decision ep_re 1135.0981972302156

{"global_step": 170000, "eval_re": [822.210759968583, 804.0607894766972, 
1894.8182424095053, 839.228811223138, 1420.2931652824889, 929.3733127106501, 
1384.917021856056, 924.9665042528935, 1111.6665567153489, 1219.446808406797], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179998/1000000 [3:13:40<10:32:03, 21.62it/s]global step 180000, trans_decision ep_re 1162.808638872429

{"global_step": 180000, "eval_re": [999.2696189724954, 1482.7153598811258, 
1333.8598878602527, 843.7062498593369, 1618.7895067051777, 934.7386556271636, 
1066.6253298527047, 980.3656592133308, 1082.5897437855037, 1285.4263769671961], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189998/1000000 [3:24:40<10:22:00, 21.70it/s]global step 190000, trans_decision ep_re 1510.9342262387543

{"global_step": 190000, "eval_re": [1743.7126766718611, 994.2450580731243, 
2154.6221587376235, 1249.0149229582287, 1314.29123769348, 2264.7427052228027, 
1825.9940446747714, 1017.3651212727206, 1397.2849677094237, 1148.0693693735063],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [3:35:40<10:15:40, 21.66it/s]global step 200000, trans_decision ep_re 1317.2340807936084

{"global_step": 200000, "eval_re": [987.7832032194772, 2026.1761213131392, 
1028.9958137408078, 946.1535564262011, 1802.0775047213324, 942.5160326350016, 
1200.4616819401135, 1636.4958094972462, 1030.4626389882355, 1571.218445454528], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209997/1000000 [3:46:30<10:05:37, 21.74it/s]global step 210000, trans_decision ep_re 1132.716100661234

{"global_step": 210000, "eval_re": [857.3917081022163, 953.8065369274848, 
1037.1419881354814, 1185.7144128408465, 1445.2982539210443, 1173.6718258996473, 
1813.2011302500678, 1244.4043715459622, 864.7848445668528, 751.7459344227357], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [3:57:20<10:04:32, 21.50it/s]global step 220000, trans_decision ep_re 1408.005644217595

{"global_step": 220000, "eval_re": [1566.6881212766607, 1118.539532790954, 
1599.7359875127026, 1016.1294937030827, 1440.124705095931, 1466.8893941944962, 
1424.5503360917928, 982.0382588390494, 2032.1220730207872, 1433.238539650493], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229998/1000000 [4:08:20<9:47:51, 21.83it/s]global step 230000, trans_decision ep_re 1262.4139544086552

{"global_step": 230000, "eval_re": [872.0012236094458, 1245.7090518094603, 
1081.690028092225, 954.8939850896368, 1797.8642307359362, 1503.269158137549, 
1049.964758646483, 1328.748876290201, 1377.3796302975054, 1412.6186013781073], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [4:19:10<9:43:56, 21.69it/s]global step 240000, trans_decision ep_re 1310.0614566553218

{"global_step": 240000, "eval_re": [1723.3616888226227, 1109.1491267504368, 
1423.6651966209922, 1137.6382899617302, 1562.4019792592414, 1150.2589317829745, 
1814.3752095721363, 911.902331803063, 1308.6364310562674, 959.2253809237518], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [4:30:10<9:33:14, 21.81it/s]global step 250000, trans_decision ep_re 1419.339465383256

{"global_step": 250000, "eval_re": [1189.2880324326802, 1539.957180912426, 
1262.0748843558367, 1310.8579365677044, 1188.106262143363, 1407.32894809717, 
1132.6717685097838, 1752.489288178392, 1873.3192566218402, 1537.3010960133645], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [4:41:00<9:25:43, 21.80it/s]global step 260000, trans_decision ep_re 1238.9034839900207

{"global_step": 260000, "eval_re": [1325.0429564348146, 1002.3987991177205, 
1428.3475952573024, 878.819496690764, 1465.3241362641168, 1034.2585544016326, 
1909.8425945445606, 957.2547451998469, 1001.622411773133, 1386.1235502163136], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [4:51:50<9:18:22, 21.79it/s]global step 270000, trans_decision ep_re 1334.3389722577053

{"global_step": 270000, "eval_re": [1723.85256063304, 912.0826284328283, 
1626.9905784226942, 1344.5583043242, 1000.6787109095303, 1699.7261846492802, 
1080.3046110948694, 1845.7742805691148, 1176.8777153789986, 932.5441481624957], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279998/1000000 [5:02:40<9:05:51, 21.98it/s]global step 280000, trans_decision ep_re 1117.462121327937

{"global_step": 280000, "eval_re": [1136.3447432306327, 936.9263373762926, 
1115.0133746401937, 1109.796714890444, 1157.1298282354587, 850.7601308703194, 
1354.6884566527856, 1599.4798330142842, 959.6552584420036, 954.8265359269556], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289998/1000000 [5:13:30<8:58:28, 21.98it/s]global step 290000, trans_decision ep_re 1258.2954947091791

{"global_step": 290000, "eval_re": [988.3742307078741, 1418.0277259453537, 
1349.220265315872, 1405.039011774057, 1012.3932300453807, 1931.1345286686412, 
1451.7296967407442, 986.0758641911186, 1004.7680034114538, 1036.1923902912952], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299998/1000000 [5:24:20<8:50:35, 21.99it/s]global step 300000, trans_decision ep_re 1363.727390328664

{"global_step": 300000, "eval_re": [1351.0150928333512, 1020.4004558700276, 
1093.5096440713041, 1241.6266651715525, 1071.5821540468892, 2001.6764791862763, 
1803.9919367026469, 1796.0664975513853, 1296.8374216540653, 960.5675561991403], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309997/1000000 [5:35:10<8:45:32, 21.88it/s]global step 310000, trans_decision ep_re 1409.6102235280646

{"global_step": 310000, "eval_re": [1738.3414989664357, 1370.4502458514635, 
1819.0951688360558, 1720.261189668032, 1803.2813142843622, 995.3038428709151, 
932.4827100813399, 1505.3616726212065, 1037.8586483442368, 1173.6659437565968], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319997/1000000 [5:46:00<8:39:36, 21.81it/s]global step 320000, trans_decision ep_re 1285.2392619797556

{"global_step": 320000, "eval_re": [1317.8012804456048, 1342.3748404114508, 
1674.6913818676096, 1020.1170394456506, 1483.2037563910387, 988.0339157978581, 
1519.3338284315062, 1122.5036951965544, 1138.2043681190903, 1246.1285136911924],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329997/1000000 [5:56:50<8:35:10, 21.68it/s]global step 330000, trans_decision ep_re 1296.6493696067018

{"global_step": 330000, "eval_re": [1371.421659429933, 958.0304139554013, 
1036.58060881905, 1361.8461551870053, 1399.4052860778204, 1943.72844184857, 
1025.4038418145753, 1727.935335290851, 1059.6303622836062, 1082.511591360207], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [6:07:40<8:23:48, 21.83it/s]global step 340000, trans_decision ep_re 1220.076973888228

{"global_step": 340000, "eval_re": [939.2170989329302, 1146.6208861731272, 
992.6794622830927, 1026.7202287756315, 921.7029867102608, 899.5455078168509, 
2173.664985660313, 1747.9880277509449, 940.0662859316357, 1412.5642688474923], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [6:18:30<8:17:53, 21.76it/s]global step 350000, trans_decision ep_re 1168.6383985076009

{"global_step": 350000, "eval_re": [1747.5591070215444, 1057.8237276227762, 
969.0124375148208, 1206.2723604341743, 912.1644954630129, 898.3821765014674, 
998.4312667355799, 1264.7798329193997, 1094.3750567634204, 1537.5835240998122], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [6:29:20<8:09:52, 21.77it/s]global step 360000, trans_decision ep_re 1003.5895006015578

{"global_step": 360000, "eval_re": [1330.6440844780507, 833.096213811838, 
906.5192623043451, 1387.6570324010047, 891.2448024813309, 940.6580063408409, 
800.9411975180187, 906.3227070691967, 1026.2639667695805, 1012.5477328413724], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [6:40:10<8:02:16, 21.77it/s]global step 370000, trans_decision ep_re 1319.0420370838608

{"global_step": 370000, "eval_re": [1062.096135615331, 1716.0829628798735, 
1645.5493072410347, 942.3917770742808, 1179.077708659374, 858.089753437024, 
984.7018382950102, 1346.2266047667558, 1704.8193252620276, 1751.3849576078956], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [6:51:00<7:54:15, 21.79it/s]global step 380000, trans_decision ep_re 1202.087716597732

{"global_step": 380000, "eval_re": [1876.7387457710383, 1118.9963027830836, 
1433.4526727438451, 1143.6511337610702, 1100.7134237638986, 804.4506286742997, 
951.0708666071199, 1439.9500862475827, 1206.6649762745496, 945.1883293508319], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389999/1000000 [7:01:50<7:48:17, 21.71it/s]global step 390000, trans_decision ep_re 1260.9667397291246

{"global_step": 390000, "eval_re": [1117.0051208281095, 1438.4376644762058, 
1086.9130747137194, 1001.1626258123587, 1702.7584350866045, 995.7813088395436, 
956.2311108140248, 2227.977764904882, 1138.546743079518, 944.8535487362785], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [7:12:40<7:38:03, 21.83it/s]global step 400000, trans_decision ep_re 1248.5830339839888

{"global_step": 400000, "eval_re": [2097.6135193975488, 1741.224704871747, 
955.1368316164509, 1592.3899660978761, 942.3071424022773, 703.5424299285805, 
1177.498364152248, 902.480311045774, 1495.3702730005725, 878.2667973268121], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [7:23:30<7:33:16, 21.69it/s]global step 410000, trans_decision ep_re 1146.4035396925942

{"global_step": 410000, "eval_re": [1191.6820193299582, 1520.9813837801544, 
959.5358443946852, 960.7653545347932, 1180.6973873734887, 1053.644589828565, 
1184.1400581943167, 1074.5921487750613, 1129.3552913374454, 1208.6413193774729],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419999/1000000 [7:34:20<7:25:11, 21.71it/s]global step 420000, trans_decision ep_re 1214.8634128592143

{"global_step": 420000, "eval_re": [1026.97677400252, 1049.6878639512452, 
989.4901944315573, 1508.6952090103675, 1253.588646353557, 1644.4922693610208, 
1228.1374476895162, 1414.2936113936162, 915.4021708365659, 1117.8699415621786], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [7:45:20<7:24:19, 21.38it/s]global step 430000, trans_decision ep_re 1304.299575978411

{"global_step": 430000, "eval_re": [1135.3057157408095, 2004.3491116840785, 
1223.1605090689727, 1638.1198168924768, 843.2488646269363, 979.3272192852835, 
992.5113874271017, 1245.6332594533199, 1249.89347116644, 1731.4464044386903], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439997/1000000 [7:56:10<7:08:36, 21.78it/s]global step 440000, trans_decision ep_re 964.2937716863778

{"global_step": 440000, "eval_re": [880.0971924905767, 850.3971940783078, 
928.7232159864649, 929.0807207017881, 887.3441515748639, 937.7064397605986, 
931.6643453037935, 1517.2930167664128, 890.8526329632755, 889.7788072376975], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449997/1000000 [8:07:00<7:01:57, 21.72it/s]global step 450000, trans_decision ep_re 1166.36644771398

{"global_step": 450000, "eval_re": [1055.2077027017715, 1048.2230390809862, 
1232.6517304556871, 898.9061300809635, 1185.1886910977962, 1107.667685867287, 
1468.7707575687123, 1127.5089959406434, 811.0018571404296, 1728.5378872055244], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [8:17:50<7:00:48, 21.39it/s]global step 460000, trans_decision ep_re 1123.6338749049057

{"global_step": 460000, "eval_re": [1108.5341423164982, 948.2527828090331, 
1081.8113758628863, 1024.5946858773518, 1022.0492918470026, 921.962144897419, 
1474.3012719580288, 1057.7735980545847, 1009.1640109684886, 1587.895444457762], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [8:28:50<6:44:07, 21.86it/s]global step 470000, trans_decision ep_re 1231.859262270359

{"global_step": 470000, "eval_re": [1595.8413560309014, 946.8076246843856, 
1484.8704030605193, 1638.700298650885, 1062.2971208840938, 1294.9740409312803, 
1303.0266778525022, 962.1990029557838, 937.9346866694113, 1091.941410983827], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [8:39:40<6:43:12, 21.49it/s]global step 480000, trans_decision ep_re 1353.1693824775182

{"global_step": 480000, "eval_re": [1358.1574919566006, 1267.0014060907426, 
1775.7962558457589, 1467.7033101352815, 1644.8994537517833, 1105.9355000100763, 
1067.153492344528, 1123.2182033862646, 1182.4325318152653, 1539.396179438881], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [8:50:30<6:27:51, 21.91it/s]global step 490000, trans_decision ep_re 1073.1522892891887

{"global_step": 490000, "eval_re": [886.2928219401477, 880.0073363190558, 
944.0521835723976, 1349.0415867015897, 875.4927017304797, 780.9811471743437, 
1266.676309969666, 1078.7117873362633, 1150.5698544646564, 1519.6971636832877], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [9:01:20<6:22:52, 21.76it/s]global step 500000, trans_decision ep_re 1345.804318601254

{"global_step": 500000, "eval_re": [1699.1810313388053, 769.4971854725137, 
929.4716645106538, 1497.0562686078345, 2099.3803606618562, 1285.892715721677, 
1325.872868837835, 967.3884916588202, 1785.3213200579903, 1098.9812791445538], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [9:12:20<6:19:54, 21.50it/s]global step 510000, trans_decision ep_re 1290.7385597058606

{"global_step": 510000, "eval_re": [1428.379880098268, 1918.340167521927, 
1179.2745212249142, 1327.0722885875498, 1329.176932278375, 1473.1208370790332, 
1156.5675003040099, 1268.8724410033826, 964.3491468231277, 862.2318821380185], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [9:23:10<6:09:48, 21.63it/s]global step 520000, trans_decision ep_re 1321.4373709321098

{"global_step": 520000, "eval_re": [1123.6064651144745, 941.535142700676, 
1753.048399780759, 1014.3990696822225, 1159.7527744565296, 1860.9681199300837, 
1597.4762683735014, 979.3844825181357, 1210.1124897078307, 1574.0904970568854], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529998/1000000 [9:34:10<6:03:11, 21.57it/s]global step 530000, trans_decision ep_re 1231.586130582585

{"global_step": 530000, "eval_re": [901.1458243406743, 1000.6144264885925, 
1219.7283025086397, 1419.1471143537635, 1003.2284914571003, 994.5210404187767, 
1391.2701216672722, 1528.4141975787454, 1094.7222432713033, 1763.0695437409809],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [9:45:00<5:51:47, 21.79it/s]global step 540000, trans_decision ep_re 1440.8431631776557

{"global_step": 540000, "eval_re": [1837.8600117945948, 1450.2714752222441, 
1037.6891936367344, 1793.4422715163842, 1570.4154703318068, 1463.452132130183, 
1647.039314656319, 1118.2121107388693, 1495.3198080241293, 994.7298437252924], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [9:55:50<5:44:23, 21.78it/s]global step 550000, trans_decision ep_re 1386.4232113789094

{"global_step": 550000, "eval_re": [1405.0186415574246, 1017.8705987045829, 
1915.3198121726505, 1067.7590709380854, 1973.7487017279536, 1005.7608021697155, 
1596.2432408207248, 971.8833739376779, 1143.2995099945524, 1767.3283617657273], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [10:06:40<5:37:34, 21.72it/s]global step 560000, trans_decision ep_re 1198.3104312211883

{"global_step": 560000, "eval_re": [947.4176403503867, 1287.6621071796003, 
950.7724484190777, 1068.3705054806728, 1066.1601693746281, 1359.4808790725372, 
1039.5889025002778, 1357.6869966253203, 1267.7953440125316, 1638.1693191968513],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [10:17:30<5:29:43, 21.73it/s]global step 570000, trans_decision ep_re 1353.989664929016

{"global_step": 570000, "eval_re": [1071.2640257547364, 1512.555099192533, 
2037.160984377285, 951.3829479719345, 1703.1214868212135, 1368.7597024896259, 
1328.3429522488061, 1069.7836325808012, 867.0974900424859, 1630.4283278107387], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [10:28:30<5:21:22, 21.78it/s]global step 580000, trans_decision ep_re 1278.3679677968796

{"global_step": 580000, "eval_re": [1431.2184297025813, 1361.062925400942, 
1753.1131727098332, 1229.1938423886497, 1043.4092726606639, 1210.3509279549303, 
1111.7193951779002, 1378.6581899836651, 1350.449694717122, 914.5038272725085], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [10:39:20<5:14:20, 21.74it/s]global step 590000, trans_decision ep_re 1212.781834549627

{"global_step": 590000, "eval_re": [1240.0532019289647, 1024.9879497160784, 
994.9779964504556, 1012.2596498579925, 1458.439272505139, 1452.7286118410834, 
984.9486989382444, 1236.085549103089, 1088.9419337179015, 1634.3954814373215], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [10:50:10<5:04:48, 21.87it/s]global step 600000, trans_decision ep_re 1296.0606223768787

{"global_step": 600000, "eval_re": [1034.1477634143828, 1182.5792759537744, 
1649.631408263592, 1376.168015899884, 1549.9091567717953, 1408.5029242742514, 
1430.210038101699, 1055.5775481301393, 803.3091966464281, 1470.5708963128411], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609998/1000000 [11:01:00<4:55:32, 21.99it/s]global step 610000, trans_decision ep_re 1462.8168950341526

{"global_step": 610000, "eval_re": [1244.0329452982126, 906.8235787411231, 
2157.403673724188, 1316.8946976156265, 897.4400248603786, 1610.4753483659356, 
1047.6501790919378, 1903.1255897621747, 1987.6635473230292, 1556.6593655589206],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619998/1000000 [11:11:50<4:48:35, 21.95it/s]global step 620000, trans_decision ep_re 1235.385887536449

{"global_step": 620000, "eval_re": [986.4775585550209, 1257.044277507951, 
922.0245272963033, 1153.499710285802, 1299.1147152619446, 968.7539163711843, 
1884.2227843073313, 1405.2566283399149, 1091.8649862153654, 1385.5997712236729],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [11:22:40<4:42:37, 21.82it/s]global step 630000, trans_decision ep_re 1234.079967362275

{"global_step": 630000, "eval_re": [828.5049321331728, 906.5960743965799, 
1218.2021906113762, 1237.800912361472, 1464.816299564161, 1098.238752770357, 
1850.665708378586, 1276.7040674934674, 1791.5946243951043, 667.6761115184736], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [11:33:30<4:35:07, 21.81it/s]global step 640000, trans_decision ep_re 1323.1023955682854

{"global_step": 640000, "eval_re": [1317.1338732088589, 1604.6411340785542, 
837.7793190302347, 1002.9677186142158, 1789.4330931552618, 933.1374764159202, 
1035.1684549934073, 1479.2406358817245, 1609.2616620855733, 1622.260588219103], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [11:44:20<4:28:43, 21.71it/s]global step 650000, trans_decision ep_re 1424.9615000058973

{"global_step": 650000, "eval_re": [910.4781778266314, 943.3290778866784, 
1808.328108224195, 1610.7364584061943, 1396.3122937480582, 2040.0780933610038, 
1821.776435205977, 1489.8092512180983, 1321.2855698951807, 907.4815342869571], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [11:55:10<4:19:46, 21.81it/s]global step 660000, trans_decision ep_re 1060.5437343503074

{"global_step": 660000, "eval_re": [718.4359544062934, 1414.5483246972346, 
794.9044432386065, 910.6684661975181, 948.3003352108211, 1306.6814805812448, 
1091.6099857903516, 913.5803729454733, 860.7357968664486, 1645.9721835690823], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [12:06:00<4:12:36, 21.77it/s]global step 670000, trans_decision ep_re 1102.5745679064592

{"global_step": 670000, "eval_re": [1350.651041975897, 1013.7450923763085, 
1275.9773597649014, 1090.9258764682422, 901.2490641024601, 1074.8561185434105, 
990.3540536612416, 1022.7805645122411, 1135.8695236384463, 1169.3369840214411], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [12:16:50<4:03:35, 21.90it/s]global step 680000, trans_decision ep_re 1111.6935376506788

{"global_step": 680000, "eval_re": [929.7570671508611, 1263.1448535655245, 
855.1015067957816, 922.6501808353064, 1023.985742507147, 1102.7605440188343, 
1432.1305405541832, 1120.3521487264388, 1321.0416992783178, 1146.0110930743936],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [12:27:50<3:58:46, 21.64it/s]global step 690000, trans_decision ep_re 1286.4444439583453

{"global_step": 690000, "eval_re": [1422.6135773826518, 1027.441116936706, 
1001.9250574657983, 1063.4764150098802, 1379.6689839891856, 1323.865039010898, 
1498.6600891843875, 1627.2667505744155, 1596.2620265083, 923.265383521229], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699998/1000000 [12:38:40<3:47:56, 21.94it/s]global step 700000, trans_decision ep_re 1302.1536579328658

{"global_step": 700000, "eval_re": [959.9976562046348, 866.696950184053, 
1416.580357772947, 1704.235283787945, 1371.992414842372, 1351.3069313301369, 
1161.8769627223621, 1001.2814000643334, 1639.0321908959477, 1548.5364315239233],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709997/1000000 [12:49:30<3:41:37, 21.81it/s]global step 710000, trans_decision ep_re 1246.2083889637763

{"global_step": 710000, "eval_re": [1113.9565143163188, 902.5309297957191, 
1663.174513440172, 1202.767898546731, 1737.6172692025957, 869.0073480033379, 
1228.2069996992975, 886.4740984362368, 1537.647402495628, 1320.7009157017249], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [13:00:20<3:34:44, 21.73it/s]global step 720000, trans_decision ep_re 1037.7057529839544

{"global_step": 720000, "eval_re": [1129.3209656579033, 806.5805063566556, 
1353.1723395214194, 1511.8489980573213, 1100.4972566723993, 948.7869596055701, 
794.6895711357072, 925.3584831722768, 922.5832853273433, 884.2191643329462], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [13:11:10<3:27:34, 21.68it/s]global step 730000, trans_decision ep_re 1052.0957133316083

{"global_step": 730000, "eval_re": [971.4911093258149, 1522.9748975408006, 
822.9003916720307, 1144.4187490613542, 1044.9605359331995, 1147.162461383706, 
955.8918618906814, 1031.0288568288006, 988.2925637812626, 891.8357058984341], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [13:22:00<3:19:32, 21.72it/s]global step 740000, trans_decision ep_re 1043.2787187251365

{"global_step": 740000, "eval_re": [956.8283519913194, 868.0102005843227, 
977.7885799974935, 1126.5603685219269, 936.5810224578668, 1099.7416934904873, 
1408.8092399897591, 1004.3190356924839, 1114.1873205490188, 939.9613739766888], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [13:33:00<3:11:23, 21.77it/s]global step 750000, trans_decision ep_re 1056.595521478128

{"global_step": 750000, "eval_re": [920.2048948485527, 1790.7198702651574, 
1121.9077496200225, 929.5169594354657, 887.3615275587701, 891.4357586973327, 
892.3267149722509, 1207.836409961918, 879.1359936086941, 1045.509335813114], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759998/1000000 [13:43:50<3:01:18, 22.06it/s]global step 760000, trans_decision ep_re 1157.1734888013948

{"global_step": 760000, "eval_re": [1213.5693192557064, 933.331797632463, 
1395.9988452596172, 1201.8520698094096, 1214.323821227474, 1060.9229757256073, 
862.2184436768741, 957.5223610548306, 1628.7132104284601, 1103.282043943505], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [13:54:40<2:54:05, 22.02it/s]global step 770000, trans_decision ep_re 1167.7928811594218

{"global_step": 770000, "eval_re": [1769.6612198983032, 1024.2103371296696, 
1686.3669801956653, 863.0998503971699, 1023.6598053461466, 1033.468199877341, 
996.662359044682, 1566.8450989440923, 1022.6216579152644, 691.3333028458846], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [14:05:30<2:46:38, 22.00it/s]global step 780000, trans_decision ep_re 1325.4217216432448

{"global_step": 780000, "eval_re": [1226.4382807960687, 1170.380092978687, 
1640.1814572716735, 878.0757215816, 998.4024960938582, 930.7614242227822, 
1622.2509710340735, 1909.760058465768, 1711.636298473253, 1166.330415514685], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789997/1000000 [14:16:20<2:40:00, 21.87it/s]global step 790000, trans_decision ep_re 964.682533461813

{"global_step": 790000, "eval_re": [921.0093094427287, 930.4591516905489, 
226.92863795203507, 942.0060730139015, 1047.4268056376675, 1201.3984412788088, 
894.2110856441894, 920.7998655419633, 1056.6334773862668, 1505.95248703002], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799997/1000000 [14:27:10<2:33:21, 21.74it/s]global step 800000, trans_decision ep_re 1014.3673652443134

{"global_step": 800000, "eval_re": [1267.389403810832, 1226.3754188137812, 
1088.2763151330346, 898.5213404085999, 926.1467061455189, 952.3715288918412, 
913.4581043653279, 883.9330381139947, 986.3295183424427, 1000.8722784177608], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [14:38:00<2:25:32, 21.76it/s]global step 810000, trans_decision ep_re 1052.783284540419

{"global_step": 810000, "eval_re": [902.2686243309329, 874.548462038574, 
968.5922788915843, 884.6400636896977, 998.4920377791973, 950.8731960578974, 
1187.6183473205704, 928.4757502346558, 1003.4035589575824, 1828.920526103496], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [14:48:51<2:17:46, 21.77it/s]global step 820000, trans_decision ep_re 1059.3304042814113

{"global_step": 820000, "eval_re": [1047.2278687169505, 1054.1260594872826, 
982.4383123251549, 1042.1137916832024, 1073.2245746934268, 871.4413225047754, 
837.8639911213811, 1269.6098668615207, 1190.929235494, 1224.3290199264202], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [14:59:41<2:10:27, 21.72it/s]global step 830000, trans_decision ep_re 995.5968764768188

{"global_step": 830000, "eval_re": [887.85160794303, 1058.521803473628, 
771.8987082062375, 807.6156742728145, 868.6903582882247, 1242.680631422922, 
828.274582800831, 1632.095267107371, 860.4519930883595, 997.8881381647705], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [15:10:31<2:02:27, 21.78it/s]global step 840000, trans_decision ep_re 1059.1402530916885

{"global_step": 840000, "eval_re": [993.0654720798167, 1060.3009946105951, 
999.398634972457, 1188.9278468413117, 1049.8807484399756, 911.4444893274239, 
902.9299175616582, 960.4415816974486, 995.2588535920878, 1529.7539917941099], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [15:21:31<1:54:42, 21.80it/s]global step 850000, trans_decision ep_re 1195.7072321749347

{"global_step": 850000, "eval_re": [1520.3879123111221, 1171.908076642765, 
1070.5837618810237, 1301.1317301782994, 984.7876122212124, 1032.1272978924474, 
1267.1740296618034, 1093.5815877698885, 955.6214179621471, 1559.768895228639], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859998/1000000 [15:32:21<1:46:43, 21.86it/s]global step 860000, trans_decision ep_re 1100.9478079649264

{"global_step": 860000, "eval_re": [1135.1990449106872, 1091.2186015108618, 
964.2855725598762, 1343.8008517661142, 1188.1876541147262, 1562.1578051687625, 
869.2315357510159, 889.7685386203658, 958.5190325316493, 1007.1094427152034], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869998/1000000 [15:43:11<1:38:04, 22.09it/s]global step 870000, trans_decision ep_re 1002.0097732078797

{"global_step": 870000, "eval_re": [974.7964916483339, 1028.0532818152808, 
845.8043651269705, 838.4555391897337, 911.0259491026248, 984.1238469819515, 
1134.5735619896382, 1346.7921596274978, 989.9917924998908, 966.4807440968771], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [15:54:01<1:31:53, 21.77it/s]global step 880000, trans_decision ep_re 1031.5828511074367

{"global_step": 880000, "eval_re": [1024.9341091108442, 961.5349258540748, 
918.3614426506387, 1033.824324017401, 931.980936264196, 837.2332231544646, 
1052.446688389826, 914.0544287126877, 1093.8928843374872, 1547.5655485827456], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [16:04:51<1:24:19, 21.74it/s]global step 890000, trans_decision ep_re 1064.3486658817603

{"global_step": 890000, "eval_re": [874.4930856100092, 1308.6104876487343, 
772.8886217390053, 866.1864273319076, 1121.9899854010353, 1079.0256876752212, 
1381.4689941945999, 1315.8576638741058, 944.5905150020942, 978.3751903408905], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [16:15:41<1:16:32, 21.77it/s]global step 900000, trans_decision ep_re 1020.0177667689965

{"global_step": 900000, "eval_re": [986.051100522307, 1013.3122827615878, 
986.4311964967278, 860.4680847079368, 1119.2800718004148, 1087.4763586705647, 
983.1318955962386, 1071.1537408569864, 936.0998735458436, 1156.7730627313567], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [16:26:31<1:08:59, 21.74it/s]global step 910000, trans_decision ep_re 974.9817988302773

{"global_step": 910000, "eval_re": [1045.9769030226616, 851.8598697340725, 
847.9847323888682, 1141.5285510581202, 1228.0018320773122, 984.8789282191706, 
934.0422498182913, 1003.5554310842238, 933.2529707133046, 778.7365201867486], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [16:37:21<1:01:16, 21.76it/s]global step 920000, trans_decision ep_re 1162.0547901713555

{"global_step": 920000, "eval_re": [1445.4429075831792, 1324.2164623755384, 
992.7192505651457, 1123.9845380114625, 1051.6263003295878, 1320.7311986368877, 
1247.4429432766788, 907.7232793640883, 1204.2962330258922, 1002.3647885450949], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [16:48:11<53:38, 21.75it/s]global step 930000, trans_decision ep_re 994.7242998353258

{"global_step": 930000, "eval_re": [1122.0520288173582, 957.7248558522559, 
858.5973938862242, 1188.527722682333, 1035.9291072883482, 979.3307433680538, 
941.7642842031375, 934.4383255857921, 1020.7128206887844, 908.1657159809719], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [16:59:01<46:05, 21.69it/s]global step 940000, trans_decision ep_re 915.6793545897654

{"global_step": 940000, "eval_re": [740.2573497753592, 957.3469978199282, 
890.635596665523, 1014.6287110072495, 1014.5257135353273, 1031.6911951057084, 
882.6329288858782, 972.8599715985088, 885.1967484980985, 767.0183330060723], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [17:09:51<38:17, 21.76it/s]global step 950000, trans_decision ep_re 1112.3746923379754

{"global_step": 950000, "eval_re": [1605.4542958783766, 1450.186204135344, 
1079.443984184543, 1011.2113241337898, 1009.8861595756692, 655.9590438757449, 
941.4002775449104, 1203.4813763635789, 1091.9650162829496, 1074.759241404848], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [17:20:41<30:41, 21.72it/s]global step 960000, trans_decision ep_re 1136.8703470518285

{"global_step": 960000, "eval_re": [948.5097339929962, 1212.4253509624316, 
1318.503096165097, 1201.3939323608454, 1284.682976631639, 1029.3612665352084, 
1284.4765272553825, 1077.5492955967293, 1148.3672654807917, 863.4340255371637], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [17:31:31<22:59, 21.75it/s]global step 970000, trans_decision ep_re 1202.7980366533325

{"global_step": 970000, "eval_re": [1014.6967735415662, 898.1856528679181, 
1334.4268627896852, 2220.4313098471102, 1599.440729358793, 831.9532071992849, 
1139.6720130563563, 896.4736121525143, 1237.2048494913856, 855.4953562287126], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [17:42:21<15:19, 21.75it/s]global step 980000, trans_decision ep_re 1145.9545666997678

{"global_step": 980000, "eval_re": [956.9053548657498, 1039.820983267875, 
1182.436615803115, 1171.6869902660903, 933.087574122845, 996.1059699736285, 
1351.4746744399881, 919.5953813915764, 1571.6135050612143, 1336.8186178055953], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [17:53:11<07:37, 21.84it/s]global step 990000, trans_decision ep_re 1188.499262514205

{"global_step": 990000, "eval_re": [1561.2049768678573, 1249.1022006404542, 
1401.8725546766927, 1276.5371809379171, 921.7722009046158, 1118.9727802008981, 
1054.4178665104985, 1057.3177731976023, 969.5944907542527, 1274.20060045126], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [18:04:01<00:00, 21.77it/s]global step 1000000, trans_decision ep_re 1053.5900653658857

{"global_step": 1000000, "eval_re": [866.7377667806169, 965.7750919127781, 
1024.617473656743, 1067.27765410892, 1127.0764368027037, 1475.9006460001083, 
1326.9758379575499, 781.9495171762389, 972.4936894629128, 927.0965398002844], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [18:04:41<00:00, 15.37it/s]
