
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.2
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:30<8:12:23, 33.51it/s]global step 10000, trans_decision ep_re -143.85294379295897

{"global_step": 10000, "eval_re": [-127.061601327115, -141.9659707017674, 
-148.6745285275939, -140.23243616433433, -138.7193832699247, 
-141.84453672748816, -119.72387650972635, -148.59175411052234, 
-181.4377323183576, -150.2776182727598], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [10:30<8:15:55, 32.94it/s]global step 20000, trans_decision ep_re 71.93250214810804

{"global_step": 20000, "eval_re": [-36.25438277904673, -25.947077657922485, 
190.31585366927843, -109.74608168100445, -147.68516400354105, 
339.69943593936955, 20.01285554788091, 293.5042926310358, -36.689640916877856, 
232.1149307319083], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
1000, 1000]}

  3%|▎         | 29999/1000000 [17:30<8:10:24, 32.97it/s]global step 30000, trans_decision ep_re 298.70789793615023

{"global_step": 30000, "eval_re": [732.6305712221719, 52.38405749753023, 
190.58643344435018, 561.7548657129604, 339.8351083936662, 609.8214429655307, 
170.51027065925192, 37.09339532376807, 107.31373415919072, 185.14909998308278], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [24:30<8:05:46, 32.94it/s]global step 40000, trans_decision ep_re 449.89328771748177

{"global_step": 40000, "eval_re": [664.0794672345305, 353.16635594652445, 
647.9138860940872, 361.6706232124804, 470.4588805450468, 454.9255502678662, 
551.2222165922889, 291.7980309344566, 502.5978738980021, 201.0999924495341], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49997/1000000 [31:30<7:58:26, 33.09it/s]global step 50000, trans_decision ep_re 645.0049030366789

{"global_step": 50000, "eval_re": [679.492465900848, 770.9546477098046, 
515.7801251527849, 538.9183103036528, 684.9401829437918, 681.4017643236272, 
720.7243640385731, 606.8740045664371, 634.741027411345, 616.2221380159242], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [38:30<7:56:34, 32.87it/s]global step 60000, trans_decision ep_re 889.3083479067179

{"global_step": 60000, "eval_re": [913.3396848668036, 803.6921216103809, 
1119.3295187934566, 777.8709553471413, 1338.8777204068992, 797.266500195972, 
722.3761115122015, 920.1736760849111, 737.831253204826, 762.3259370445865], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [45:30<7:43:45, 33.42it/s]global step 70000, trans_decision ep_re 909.4351358030887

{"global_step": 70000, "eval_re": [629.1651194301867, 872.4107140992967, 
775.138632503206, 982.5274237412567, 780.626403578244, 1167.2279697007382, 
771.5406904126917, 975.8208476000901, 1289.5193114118174, 850.374245553359], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [52:30<7:44:51, 32.98it/s]global step 80000, trans_decision ep_re 757.21997575423

{"global_step": 80000, "eval_re": [816.5828657113478, 811.9445066173505, 
848.1114758125447, 628.938219353289, 697.9036468862718, 669.657261469446, 
986.8262736480777, 795.1375135204876, 641.3506666944573, 675.7473278290266], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89997/1000000 [59:30<7:40:29, 32.94it/s]global step 90000, trans_decision ep_re 881.3231700347663

{"global_step": 90000, "eval_re": [877.1455126768961, 593.4791142779467, 
1027.2161820756733, 789.5690234077869, 764.8205165941577, 735.0034440752825, 
929.1427491898797, 725.451711149851, 1329.8048023058673, 1041.5986445943217], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [1:06:30<7:34:20, 33.01it/s]global step 100000, trans_decision ep_re 804.8967828344576

{"global_step": 100000, "eval_re": [763.1787756875933, 856.5741237468981, 
728.5129409198205, 978.2172085637391, 679.7842228880093, 630.4321084629818, 
775.1964745132548, 929.9480378404687, 916.0249386737412, 791.0989970480688], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109997/1000000 [1:13:30<7:30:26, 32.93it/s]global step 110000, trans_decision ep_re 987.602971768472

{"global_step": 110000, "eval_re": [819.0445026542861, 991.6250376856784, 
948.8309470822603, 689.6339426481283, 1222.4011441728394, 946.4596424592634, 
967.5529929044685, 1010.6692040258368, 1564.3461715922792, 715.46613245968], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119997/1000000 [1:20:30<7:27:21, 32.79it/s]global step 120000, trans_decision ep_re 823.0056137390314

{"global_step": 120000, "eval_re": [853.9984989435902, 816.3449682989319, 
976.1426273392825, 669.5796279537086, 797.0214653355231, 816.0996498048622, 
810.9895305735974, 913.6714834713565, 748.6740784726139, 827.5342071968482], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:27:30<7:18:47, 33.05it/s]global step 130000, trans_decision ep_re 970.0785757279484

{"global_step": 130000, "eval_re": [801.4995732154325, 1142.218611151746, 
1120.1922859610643, 1159.2513100547988, 767.8082533439355, 811.0878189605838, 
949.6301059587543, 1119.8249086058736, 777.8986175612422, 1051.3742724660533], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [1:34:30<7:15:05, 32.94it/s]global step 140000, trans_decision ep_re 940.7793245684067

{"global_step": 140000, "eval_re": [1012.7847520950944, 947.220066646454, 
690.7193921128983, 1025.7148406732313, 1011.5785226272276, 967.041034878506, 
935.4189511996276, 1348.9609385766403, 755.8381453373152, 712.5166015370703], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [1:41:30<7:11:07, 32.86it/s]global step 150000, trans_decision ep_re 972.2407385055956

{"global_step": 150000, "eval_re": [939.1764071222614, 790.0054283543113, 
1344.0576554814018, 995.3445657415566, 954.7656118015102, 689.5873387923856, 
1095.3844338319445, 1014.3843552068488, 996.3531144595396, 903.3484742641953], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159999/1000000 [1:48:30<7:05:09, 32.93it/s]global step 160000, trans_decision ep_re 1164.7812889655002

{"global_step": 160000, "eval_re": [1176.1172815278542, 862.762555458946, 
1584.4049010867848, 1148.0222242838938, 1069.8923798716485, 822.2901300325609, 
981.6728185596742, 1495.4183708884455, 1314.8819443510565, 1192.3502835941395], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [1:55:20<6:55:10, 33.32it/s]global step 170000, trans_decision ep_re 959.546608945219

{"global_step": 170000, "eval_re": [951.5764870334983, 588.6834154887996, 
834.285552674245, 1149.3322969596118, 1005.42508461704, 1023.099661972178, 
815.9630439083494, 1243.6125318544073, 923.4184127173367, 1060.069602226724], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179999/1000000 [2:02:20<6:49:12, 33.40it/s]global step 180000, trans_decision ep_re 999.9408849519581

{"global_step": 180000, "eval_re": [1373.227503013259, 917.7394066837909, 
864.1808551738383, 1328.288096575746, 1273.731518960101, 1013.5168963846171, 
712.8824200853577, 722.0464839428977, 1038.704413370202, 755.091255329771], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [2:09:20<6:42:02, 33.58it/s]global step 190000, trans_decision ep_re 916.449973253672

{"global_step": 190000, "eval_re": [909.0274899947912, 755.446752718891, 
1035.8562869223078, 836.5172599857925, 1050.8170870176878, 883.648946085815, 
1153.9986939548362, 853.8584763421355, 933.1349934396732, 752.19374607479], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [2:16:10<6:36:21, 33.64it/s]global step 200000, trans_decision ep_re 1182.7199663391243

{"global_step": 200000, "eval_re": [1029.9263693768107, 875.7194568933745, 
1118.5577837087062, 1568.737795123357, 1224.957352525243, 1088.6504986706502, 
991.9760292021588, 1214.6626384417134, 1120.300393265746, 1593.7113461834817], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [2:23:10<6:34:03, 33.41it/s]global step 210000, trans_decision ep_re 1080.9574749177132

{"global_step": 210000, "eval_re": [1483.2410332989816, 814.331366807998, 
1426.4168853433875, 1189.9721650827187, 910.4112357350724, 859.4708347502179, 
1115.7320344124983, 1316.6329133173815, 888.9263273792833, 804.4399530495925], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [2:30:10<6:26:45, 33.61it/s]global step 220000, trans_decision ep_re 1055.0478655322897

{"global_step": 220000, "eval_re": [1030.781459239653, 770.652909733929, 
836.6405365796534, 912.3870800576088, 1056.2155000237753, 1292.0664868473484, 
908.2469424202163, 1419.4262479566514, 888.8813020632664, 1435.1801904007953], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [2:37:00<6:22:50, 33.52it/s]global step 230000, trans_decision ep_re 980.3361873320279

{"global_step": 230000, "eval_re": [925.1647231397915, 923.1819338631389, 
1008.8761000020787, 741.9538056163268, 813.4141112499078, 1233.9333215174693, 
1371.679639265808, 953.663327530329, 850.0469056072144, 981.448005528214], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [2:44:00<6:20:20, 33.30it/s]global step 240000, trans_decision ep_re 1201.3534287900286

{"global_step": 240000, "eval_re": [906.0116644115914, 1337.9280977545175, 
996.5701184678112, 928.5403647265867, 1649.5619269899078, 1383.3116940721434, 
1782.9216532507714, 949.3081694565715, 1038.9413353073937, 1040.4392634629905], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [2:51:00<6:19:10, 32.97it/s]global step 250000, trans_decision ep_re 963.9603674512057

{"global_step": 250000, "eval_re": [1479.23522025365, 694.0973595444627, 
1125.7880927340075, 899.5729250739228, 972.5327813637231, 313.227990981908, 
1187.4488944906066, 816.5820709046717, 1123.507523095993, 1027.6108160691108], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [2:58:00<6:08:52, 33.44it/s]global step 260000, trans_decision ep_re 1020.5317972645989

{"global_step": 260000, "eval_re": [797.2467055344298, 947.1299947447043, 
1088.540985503295, 876.4032063137273, 1195.1681462393674, 820.7602407059759, 
1058.1987075736197, 946.1568177875995, 1414.285744234843, 1061.4274240084262], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [3:05:00<6:05:27, 33.29it/s]global step 270000, trans_decision ep_re 1025.4095498221477

{"global_step": 270000, "eval_re": [997.2585556593672, 1082.7896034548874, 
746.1703498687039, 1133.8904056490355, 1165.3981486198604, 874.9469019726195, 
911.4420556206117, 827.3530720155464, 1503.8342561191444, 1011.0121492417016], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [3:12:00<6:00:20, 33.30it/s]global step 280000, trans_decision ep_re 1126.8417432379497

{"global_step": 280000, "eval_re": [1240.7326670974644, 1068.9099756440187, 
1526.102286397102, 1028.1901917201567, 1304.5644838010267, 1170.0189848005632, 
964.3982949591223, 1202.9531373619627, 757.7661908922242, 1004.7812197058602], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [3:19:00<5:58:48, 32.98it/s]global step 290000, trans_decision ep_re 1043.6924394099312

{"global_step": 290000, "eval_re": [982.8638050937517, 979.6906142728269, 
951.3537744258279, 1230.0899575307558, 818.2603271456651, 1066.5044170282288, 
1353.6037631085335, 1130.7508039472632, 991.0908094627332, 932.7161220837263], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [3:26:00<5:54:38, 32.90it/s]global step 300000, trans_decision ep_re 1008.5284803437326

{"global_step": 300000, "eval_re": [715.7909721068025, 1232.0247207559241, 
1018.0183125251486, 866.2513975875839, 865.892622051274, 974.3504336249867, 
1072.3762975835587, 1316.1503491007286, 1086.1378361571083, 938.2918619442099], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [3:32:50<5:44:01, 33.43it/s]global step 310000, trans_decision ep_re 989.5485369449373

{"global_step": 310000, "eval_re": [989.9820115837636, 1100.7250302523498, 
912.4327330352826, 1114.704189758967, 1063.3079173260394, 954.3781143275185, 
1061.8342018992626, 312.98231314394263, 895.6132177717514, 1489.525640350496], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [3:39:50<5:38:51, 33.45it/s]global step 320000, trans_decision ep_re 1015.1408356263253

{"global_step": 320000, "eval_re": [1306.3957355164002, 798.4148930458449, 
1122.1792638914042, 1257.9161978370435, 1039.1052292901911, 976.0677512652916, 
999.1749895607909, 766.0615360306474, 933.5862557825509, 952.5065040430868], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [3:46:50<5:38:54, 32.95it/s]global step 330000, trans_decision ep_re 1214.1344884881814

{"global_step": 330000, "eval_re": [1446.9290506761406, 1166.677813012447, 
1389.6688822486344, 1002.9016549932228, 1367.703190566635, 894.5139982292263, 
1076.4389922124476, 1195.9656431675467, 1345.1597103632064, 1255.385949412309], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [3:53:50<5:29:16, 33.41it/s]global step 340000, trans_decision ep_re 979.9015605326031

{"global_step": 340000, "eval_re": [790.8614216062782, 783.2284555384875, 
1498.0111717859756, 839.6299414766495, 1014.9307244990881, 850.810548443907, 
1082.0296837648489, 880.1656906481238, 1091.9298592476794, 967.4181083149917], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [4:00:50<5:28:29, 32.98it/s]global step 350000, trans_decision ep_re 1110.6840721082422

{"global_step": 350000, "eval_re": [1554.5090050985689, 881.8315907175873, 
1109.9773567040224, 901.0880462200506, 1372.9850367096055, 951.7939621495053, 
1255.548206889076, 793.9644015759972, 1120.860964974813, 1164.2821500431962], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [4:07:50<5:21:05, 33.22it/s]global step 360000, trans_decision ep_re 964.4640123387842

{"global_step": 360000, "eval_re": [818.2938300264037, 1129.2557509140352, 
991.6224242551823, 789.571488631153, 925.7163955403513, 925.2669899458001, 
1180.8118715085113, 1202.6496479329614, 834.4254699913567, 847.0262546420864], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [4:14:50<5:17:16, 33.09it/s]global step 370000, trans_decision ep_re 992.4257414600464

{"global_step": 370000, "eval_re": [688.3167638118641, 1296.5958887622048, 
805.5090107920292, 838.1310878532274, 1051.5933408363903, 748.8619610207827, 
1290.1934072572787, 1035.6460358870597, 910.6528175828084, 1258.7571007968186], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [4:21:50<5:13:55, 32.92it/s]global step 380000, trans_decision ep_re 1120.5836030974958

{"global_step": 380000, "eval_re": [800.9646632349475, 1449.294926205784, 
1265.3299617421937, 1012.611747396648, 828.1699828591962, 1070.318459020856, 
1315.9223741860428, 835.1493254731887, 1370.4005591210605, 1257.6740317350404], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [4:28:50<5:03:56, 33.45it/s]global step 390000, trans_decision ep_re 1241.285701685387

{"global_step": 390000, "eval_re": [1344.062246787425, 1123.0332398384949, 
1377.775552492544, 1149.2208141794886, 1043.6413664353267, 1326.7706936147354, 
748.969843485185, 1269.60181374529, 1294.1904262022244, 1735.5910200731573], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [4:35:50<4:58:15, 33.53it/s]global step 400000, trans_decision ep_re 1095.30798077189

{"global_step": 400000, "eval_re": [804.4205763198047, 1066.0247854954257, 
1244.956660702227, 1120.0124600103827, 1109.726874721016, 905.77794759437, 
1516.200783780918, 961.287103083704, 1171.0413892771476, 1053.6312267339054], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [4:42:50<4:58:18, 32.96it/s]global step 410000, trans_decision ep_re 1132.8326629519702

{"global_step": 410000, "eval_re": [814.0960390163082, 915.2972714987521, 
966.4003966727552, 1363.1751699460497, 1097.3751982387455, 1381.8158579242188, 
1279.740838310858, 1407.651021696863, 767.4928012518747, 1335.282034963277], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419999/1000000 [4:49:50<4:51:12, 33.19it/s]global step 420000, trans_decision ep_re 966.7127248272158

{"global_step": 420000, "eval_re": [1303.5106099344553, 776.9406168687174, 
732.0708059799678, 855.6999472812207, 1201.4901303947424, 1102.7679319391782, 
805.3189017398086, 1210.3997521479944, 729.1574158300747, 949.7711361559968], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [4:56:50<4:48:37, 32.91it/s]global step 430000, trans_decision ep_re 967.2948694910585

{"global_step": 430000, "eval_re": [784.3602427062224, 1082.5832446357804, 
1029.6989162233324, 752.560226841615, 830.0182450159274, 1161.310837488073, 
787.3652961965253, 1265.4871299803226, 1276.064467216077, 703.5000886067093], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439997/1000000 [5:03:50<4:42:49, 33.00it/s]global step 440000, trans_decision ep_re 930.7507928915923

{"global_step": 440000, "eval_re": [942.991178576864, 1195.583121952973, 
1149.8269291225579, 785.9309607533468, 1002.9602640790279, 608.7033861658055, 
758.3046359093968, 1229.245272690393, 799.1303189915852, 834.831860673973], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449997/1000000 [5:10:50<4:39:19, 32.82it/s]global step 450000, trans_decision ep_re 951.1325117762935

{"global_step": 450000, "eval_re": [986.81109046316, 1054.728781409836, 
802.8363237177077, 1208.5837487901879, 979.8172894116747, 880.6945516854323, 
806.910891158469, 1019.2151934509756, 719.3341461811101, 1052.3931014943803], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [5:17:50<4:28:03, 33.58it/s]global step 460000, trans_decision ep_re 1162.2223933389355

{"global_step": 460000, "eval_re": [797.1750520339275, 1199.1442283635765, 
1328.509192028458, 1275.9383316218898, 1299.5896135234987, 1005.187203275921, 
1240.1978340718795, 870.1349874466323, 1338.844152315143, 1267.5033387084297], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [5:24:50<4:27:52, 32.98it/s]global step 470000, trans_decision ep_re 1010.1304059166984

{"global_step": 470000, "eval_re": [977.8667629976177, 1135.4172652420714, 
1321.2492379233588, 1138.7700022006222, 761.9691438067131, 577.6765220915242, 
1030.9306528651123, 1007.9826397640452, 1159.086067284466, 990.3557649914527], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [5:31:50<4:19:47, 33.36it/s]global step 480000, trans_decision ep_re 1001.0553505071291

{"global_step": 480000, "eval_re": [846.0864544274986, 1148.5032186511992, 
1119.6514008366905, 790.4268399606043, 753.5165468625353, 1148.2086442651987, 
1021.8428716507584, 1196.8759041752314, 860.2307837437266, 1125.210840497848], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [5:38:50<4:16:14, 33.17it/s]global step 490000, trans_decision ep_re 1095.3098899240472

{"global_step": 490000, "eval_re": [844.4449406734676, 787.5884241240801, 
1184.5714743169067, 1203.0983449482912, 1358.4368500522908, 758.8417607988213, 
961.7474226608981, 1180.851608693172, 1173.8973003141637, 1499.6207726583812], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [5:45:50<4:09:44, 33.37it/s]global step 500000, trans_decision ep_re 1048.4434867265313

{"global_step": 500000, "eval_re": [1140.147257064128, 913.6519408775707, 
1390.0475802641863, 803.2614515928487, 1065.863383802313, 1294.4342604689184, 
750.4531384739272, 1099.1284252058917, 992.6218979756068, 1034.8255315399242], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [5:52:50<4:08:08, 32.91it/s]global step 510000, trans_decision ep_re 991.205921089528

{"global_step": 510000, "eval_re": [843.2005231912713, 1228.3610017422523, 
1134.7703952331362, 860.8064194248255, 741.2408949938477, 1056.3293561543678, 
1073.8602091828177, 733.6718142648509, 1318.7734096340798, 921.0451870738326], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519996/1000000 [5:59:50<4:00:17, 33.29it/s]global step 520000, trans_decision ep_re 1050.3338529148969

{"global_step": 520000, "eval_re": [740.2321155587719, 1395.3155859943497, 
1002.1780834665736, 1178.561736752672, 826.4475006894397, 1293.674890094302, 
1050.4837208320064, 956.8826248747467, 1255.9569740937288, 803.6052967923777], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529999/1000000 [6:07:00<3:57:49, 32.94it/s]global step 530000, trans_decision ep_re 1042.057704753605

{"global_step": 530000, "eval_re": [781.1540279785528, 1166.2858002694647, 
1166.5828866273953, 1256.1371253028292, 1131.8159594984522, 901.9100813157181, 
1328.2625063493974, 799.245593248311, 948.5179389890807, 940.6651279568509], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [6:14:00<3:51:31, 33.11it/s]global step 540000, trans_decision ep_re 1017.8485606088436

{"global_step": 540000, "eval_re": [1108.3763767441642, 908.6799994743682, 
895.2441843197415, 997.4390051886678, 1424.32553309467, 967.3587862667069, 
1277.4071006768236, 850.1531677205452, 842.708315104521, 906.7931374982298], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [6:21:00<3:47:59, 32.90it/s]global step 550000, trans_decision ep_re 855.2544516675198

{"global_step": 550000, "eval_re": [888.0560972737156, 708.0762545368174, 
716.59442950319, 953.7351001939384, 839.1489963085079, 759.9682915788371, 
918.3992547809244, 780.397790577178, 834.9454152239997, 1153.222886698089], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [6:28:00<3:40:54, 33.20it/s]global step 560000, trans_decision ep_re 1162.2773593569198

{"global_step": 560000, "eval_re": [792.7544164982262, 1265.5682826115387, 
1160.7612948502633, 1305.5319720215437, 1168.8380938922076, 1195.4154603457098, 
1379.5288780398828, 1232.728070712687, 1367.6889373227953, 753.9581872743443], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [6:34:50<3:33:35, 33.55it/s]global step 570000, trans_decision ep_re 912.9808788151119

{"global_step": 570000, "eval_re": [1487.163781021747, 754.9349515360387, 
790.1753900479547, 897.4371092213443, 907.9546251066333, 982.5425372732716, 
699.4839759640396, 887.7128630241077, 708.7749834725676, 1013.6285714834133], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [6:41:50<3:29:21, 33.43it/s]global step 580000, trans_decision ep_re 963.2035372465946

{"global_step": 580000, "eval_re": [952.5836901677176, 765.1595098167302, 
1089.9441127729756, 1184.992658207522, 798.9539820940269, 973.329338517172, 
820.0832799982234, 940.9124250255961, 1133.6072643717912, 972.4691114941915], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [6:48:50<3:23:27, 33.59it/s]global step 590000, trans_decision ep_re 1105.4857364117383

{"global_step": 590000, "eval_re": [1470.6375351343545, 1663.1636521710197, 
789.5011456346697, 851.0128255688315, 1390.0282559992256, 1000.4048167244302, 
825.2441319606882, 834.6145463738203, 806.4809739372348, 1423.769480613112], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [6:55:40<3:22:29, 32.92it/s]global step 600000, trans_decision ep_re 937.3905117452277

{"global_step": 600000, "eval_re": [685.4153407468508, 772.2387153836271, 
1232.3371377541841, 1110.762053102486, 1191.023164712542, 757.1554941960432, 
806.9684021726608, 788.3450131706076, 1257.8241024230838, 771.8356937901934], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [7:02:40<3:17:23, 32.93it/s]global step 610000, trans_decision ep_re 1145.5500795495977

{"global_step": 610000, "eval_re": [929.9091518620588, 1277.9962485541253, 
1170.7210072477528, 923.1751221853956, 956.942532540903, 1421.9365573148248, 
1131.9018116732746, 1147.336695258062, 1650.2964868346487, 845.2851820249329], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [7:09:40<3:09:00, 33.51it/s]global step 620000, trans_decision ep_re 1008.2568881499665

{"global_step": 620000, "eval_re": [1155.2192454709239, 1343.562772231189, 
709.9452884190825, 1270.0990154867957, 1203.656019510891, 626.9916346741375, 
1030.2263127443377, 1257.815450035306, 825.3565960454463, 659.6965468815548], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [7:16:40<3:03:05, 33.68it/s]global step 630000, trans_decision ep_re 1017.1283421016736

{"global_step": 630000, "eval_re": [791.5361138484084, 1554.6184879659802, 
1177.3187521903717, 889.0761773902052, 1021.1230708477203, 962.5466865279617, 
981.5325171231543, 874.2225362105851, 1064.1820534355231, 855.1270254768275], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [7:23:40<3:01:21, 33.08it/s]global step 640000, trans_decision ep_re 1046.0432569355632

{"global_step": 640000, "eval_re": [733.4575019874111, 804.1576620208577, 
934.3291319527626, 1157.7070630961416, 1342.2497292588835, 1150.8559855842943, 
1482.4061359067975, 1033.8291791580355, 1145.1932954041772, 676.2468849862729], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [7:30:30<2:54:08, 33.50it/s]global step 650000, trans_decision ep_re 1036.7177232812417

{"global_step": 650000, "eval_re": [1066.8683335397386, 1310.4255416684468, 
886.83790426671, 1193.4230002992094, 993.4094254347184, 1186.4911236990558, 
845.065585266369, 1067.0818003186805, 1098.814118710316, 718.7603996091732], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [7:37:30<2:49:46, 33.38it/s]global step 660000, trans_decision ep_re 905.723661167431

{"global_step": 660000, "eval_re": [986.4295184280945, 999.1918917400059, 
1106.7965677188201, 944.6761198507041, 954.2486426905314, 900.3732961783293, 
823.8488334287039, 854.6455869561996, 751.9543593087794, 735.071795374142], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [7:44:30<2:43:08, 33.71it/s]global step 670000, trans_decision ep_re 946.6391227101097

{"global_step": 670000, "eval_re": [962.549667200944, 1459.5566707512664, 
793.7268994943657, 891.8660203469954, 589.5580568547032, 979.8269918666923, 
820.4174054777309, 1075.0694154505165, 711.0674278652158, 1182.7526717926692], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [7:51:20<2:40:07, 33.31it/s]global step 680000, trans_decision ep_re 1259.9211640046713

{"global_step": 680000, "eval_re": [1069.9149228254753, 945.0657326441857, 
1388.8028478400493, 1292.6995584193512, 1265.068302565434, 1518.9686826970692, 
1157.6549544676109, 1313.9208971823718, 1437.4550569183596, 1209.6606844868081],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [7:58:20<2:35:10, 33.30it/s]global step 690000, trans_decision ep_re 1102.9472298084288

{"global_step": 690000, "eval_re": [1489.4702763132173, 1241.7742333638564, 
1110.0548136898587, 1214.297578382784, 1043.7813816210946, 1175.1184771037465, 
873.5059557047143, 1116.4104781244366, 883.3440495060397, 881.7150542745397], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [8:05:20<2:31:58, 32.90it/s]global step 700000, trans_decision ep_re 921.6339431274433

{"global_step": 700000, "eval_re": [955.4837514546741, 997.124304646022, 
780.6059903024563, 1052.8933786120099, 676.1250061816447, 783.264493870211, 
895.8512514321794, 956.2322442681825, 1136.9275943598618, 981.8314161471917], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709997/1000000 [8:12:20<2:25:07, 33.30it/s]global step 710000, trans_decision ep_re 1277.001028198516

{"global_step": 710000, "eval_re": [843.9354708586899, 1485.7013975876823, 
1165.0746906757477, 877.4881358752326, 1301.8240458337582, 1593.426019797452, 
1481.2584789059993, 1375.2017605290891, 1182.6211516122485, 1463.4791303092597],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [8:19:10<2:19:52, 33.36it/s]global step 720000, trans_decision ep_re 1070.2467110225407

{"global_step": 720000, "eval_re": [842.181360882631, 1448.6436441605952, 
1050.796295645692, 943.0153121051908, 872.0029863692309, 1013.2760571056197, 
1202.9879345467882, 813.9738163100271, 1433.4961502943354, 1082.0935528052967], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [8:26:10<2:14:40, 33.41it/s]global step 730000, trans_decision ep_re 1056.0933532279964

{"global_step": 730000, "eval_re": [840.8661427615128, 992.358038853908, 
1239.5177482245233, 817.4683209651088, 1394.2607777594505, 902.5257864141901, 
1147.9299906074953, 1082.6579409058481, 1018.7407375074188, 1124.6080482805107],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [8:33:10<2:09:23, 33.49it/s]global step 740000, trans_decision ep_re 1106.0450878498036

{"global_step": 740000, "eval_re": [815.7074932807863, 1178.1941162253424, 
1321.5925720016223, 775.2287774235263, 1148.6064739927726, 828.0014828599175, 
859.9693607598949, 1229.935631432376, 1693.26597495425, 1209.9489955675467], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [8:40:00<2:03:59, 33.60it/s]global step 750000, trans_decision ep_re 947.7663864282864

{"global_step": 750000, "eval_re": [1128.2088599708663, 877.4085098430842, 
834.4441221942786, 927.4382694392245, 1386.664491527876, 786.4815820446794, 
980.8833480495862, 887.3931515440194, 897.9208166085449, 770.8207130607045], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [8:47:00<1:59:59, 33.34it/s]global step 760000, trans_decision ep_re 1099.0910697547426

{"global_step": 760000, "eval_re": [784.7276617120725, 1078.495391101294, 
1074.781511889339, 985.4944734701606, 1323.8246427425977, 1255.2408187502829, 
1525.2299473003975, 1101.614396084867, 860.9485500589694, 1000.5533044374465], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [8:54:00<1:54:13, 33.56it/s]global step 770000, trans_decision ep_re 1115.0719643042034

{"global_step": 770000, "eval_re": [1070.8373816521919, 1038.1914831007875, 
1425.0438080730921, 1178.6159855253488, 805.3916138858743, 1136.4786370703432, 
1368.2752434694721, 836.3852319280911, 1003.3969723838175, 1288.1032859530133], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779999/1000000 [9:01:00<1:49:43, 33.42it/s]global step 780000, trans_decision ep_re 953.2577803881193

{"global_step": 780000, "eval_re": [850.1947303194818, 969.5476007999188, 
849.3675264515616, 730.2744717919416, 1064.0857198488306, 1410.4764829359713, 
804.4050717802248, 779.8520308916196, 1090.0159283244038, 984.35824073724], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [9:08:00<1:44:19, 33.55it/s]global step 790000, trans_decision ep_re 1007.0289337595084

{"global_step": 790000, "eval_re": [599.8968209243587, 763.8389508862997, 
1104.2603441365936, 1269.3820781995394, 883.0083423916838, 1357.960440025797, 
1126.6399741755367, 1038.4290479765104, 1114.7388619015276, 812.1344769772372], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [9:14:50<1:39:04, 33.64it/s]global step 800000, trans_decision ep_re 981.7784566953844

{"global_step": 800000, "eval_re": [757.5856792314426, 851.7265281326626, 
924.7145532247388, 1294.4407276209995, 995.816274975557, 1008.4343045996158, 
718.2045782490442, 1079.7819251908616, 736.8469491919808, 1450.233046536942], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [9:21:50<1:35:01, 33.33it/s]global step 810000, trans_decision ep_re 985.6630166644952

{"global_step": 810000, "eval_re": [777.5082853470043, 1133.3555976961668, 
979.5643282599716, 961.3207598197769, 984.7003712750422, 912.8914260732518, 
1003.1358726821312, 960.5566126185523, 891.9676975597298, 1251.6292153133252], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [9:28:50<1:29:20, 33.58it/s]global step 820000, trans_decision ep_re 963.6545981099295

{"global_step": 820000, "eval_re": [722.4020576237231, 863.6533705649488, 
772.5419085424281, 739.1321581742076, 963.5546899363941, 937.3915558932495, 
1245.5236950233038, 1047.1015954183367, 1526.5919511040127, 818.6529988186899], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [9:35:50<1:26:04, 32.92it/s]global step 830000, trans_decision ep_re 1111.1519201845713

{"global_step": 830000, "eval_re": [1031.935181149801, 1147.3139173898153, 
988.1068945071931, 1286.4340270138089, 824.2989689647064, 1299.4859029622105, 
1066.164550921254, 924.7910628134802, 1414.2416811296466, 1128.747014993799], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [9:42:50<1:19:56, 33.36it/s]global step 840000, trans_decision ep_re 870.1349676785288

{"global_step": 840000, "eval_re": [765.2768573559975, 897.2049329322583, 
825.2872250214293, 754.2875084733286, 711.0689044349834, 993.5490591634499, 
748.1504645221295, 800.0735091383311, 838.8246244086187, 1367.6265913347606], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [9:49:50<1:15:40, 33.03it/s]global step 850000, trans_decision ep_re 1067.2424773044418

{"global_step": 850000, "eval_re": [891.5938331757723, 774.6703655149361, 
1219.0923289647128, 1173.8055702044926, 951.1291044709167, 1052.8149356320957, 
1110.3105869683452, 1057.0214959933521, 1301.316352568049, 1140.6701995517426], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [9:56:40<1:09:59, 33.33it/s]global step 860000, trans_decision ep_re 1103.7819169055213

{"global_step": 860000, "eval_re": [1247.8141754394092, 935.8956931990411, 
922.9039113767568, 1080.5723187636554, 1472.5084198588502, 1236.781969989531, 
1577.705004522588, 758.1588111564344, 964.8083189901066, 840.6705457588389], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [10:03:40<1:04:29, 33.60it/s]global step 870000, trans_decision ep_re 1019.5294055609171

{"global_step": 870000, "eval_re": [785.9838177869166, 1363.9214477273388, 
785.1745644870459, 954.5117373001749, 860.798067238039, 870.5574800143135, 
1257.300922700771, 1136.2072577220176, 1037.761884721177, 1143.0768759113755], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [10:10:40<59:28, 33.63it/s]global step 880000, trans_decision ep_re 1019.762969007564

{"global_step": 880000, "eval_re": [886.6555926871197, 870.2649025377066, 
1077.045103151945, 1283.7828356108494, 837.6442492387534, 850.6122856098518, 
915.8365082112537, 1124.7659405128595, 1358.8904313876671, 992.1318411276349], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [10:17:30<54:37, 33.56it/s]global step 890000, trans_decision ep_re 933.2412904831656

{"global_step": 890000, "eval_re": [1044.3093652590958, 801.4501177741909, 
721.6762398663341, 1030.1220221447238, 943.1663318478982, 1312.7905833211985, 
1016.4549660493719, 819.6011927247248, 865.5943614782, 777.247724365918], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [10:24:30<50:40, 32.89it/s]global step 900000, trans_decision ep_re 1098.3826598948037

{"global_step": 900000, "eval_re": [775.2581595320662, 1101.73663356765, 
949.9117961075425, 1570.6775737063624, 1080.3504764503382, 943.7475258830681, 
1357.3262533827728, 1476.1254335497724, 813.9561997731979, 914.7365469952648], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [10:31:30<45:29, 32.98it/s]global step 910000, trans_decision ep_re 958.2127153199441

{"global_step": 910000, "eval_re": [858.5519281307505, 1108.881945061812, 
1007.8594743973495, 1042.6367068538848, 762.9683751109446, 767.5344671382157, 
862.7359421246899, 800.0726683865153, 1375.5006709367647, 995.384975058514], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [10:38:30<40:10, 33.19it/s]global step 920000, trans_decision ep_re 1074.8963108589971

{"global_step": 920000, "eval_re": [1011.8696669457213, 1210.4901804263127, 
924.6647888817527, 797.0703666447804, 959.457714128853, 1397.8691735059058, 
1076.8100312451238, 840.2497407210004, 1356.7041167323625, 1173.7773293581604], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [10:45:30<35:29, 32.88it/s]global step 930000, trans_decision ep_re 1143.4346839015009

{"global_step": 930000, "eval_re": [1120.1197184149505, 1161.103845369502, 
1060.3689114572605, 1215.8205993304623, 1154.370922712999, 789.4919372339901, 
768.3452981739736, 1236.0510280714113, 1278.713275446471, 1649.961302803987], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [10:52:30<30:10, 33.14it/s]global step 940000, trans_decision ep_re 945.8596110520126

{"global_step": 940000, "eval_re": [1142.7093545833864, 1094.322796412153, 
1043.9331724438487, 829.2580303887449, 925.3182772692087, 726.3511596077037, 
934.3390860076292, 709.9452498288143, 1013.2102842428292, 1039.2086997358097], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [10:59:30<24:53, 33.47it/s]global step 950000, trans_decision ep_re 1065.9571952533108

{"global_step": 950000, "eval_re": [986.7806652545029, 1022.5792462282222, 
945.2382228049, 962.0987812680085, 1312.2961786124233, 817.1930582078135, 
1032.1190360023325, 1026.7579380125358, 1286.368272274988, 1268.1405538673812], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [11:06:30<20:01, 33.30it/s]global step 960000, trans_decision ep_re 922.5910739395683

{"global_step": 960000, "eval_re": [1569.5612040047208, 1097.0902742613494, 
821.2201414923733, 931.8687268427475, -141.2654333605483, 906.5968316776615, 
822.6816369986224, 1006.8249442168558, 1198.4766405659523, 1012.8557726959507], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [11:13:30<15:05, 33.12it/s]global step 970000, trans_decision ep_re 1041.4605472234884

{"global_step": 970000, "eval_re": [1187.2430865716108, 962.541438873, 
939.9146251629683, 821.0938007437258, 1013.2184670252824, 1736.976852082037, 
1126.4009021246495, 770.7767478130572, 752.0711846570869, 1104.3683671814667], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [11:20:20<10:08, 32.85it/s]global step 980000, trans_decision ep_re 945.5744970865753

{"global_step": 980000, "eval_re": [1097.284050143008, 885.0942071837158, 
685.5731414017204, 843.5445704299742, 1383.5363766388245, 972.4087481277779, 
686.1699226849328, 688.6033435422192, 795.4637148536625, 1418.0668958599178], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989997/1000000 [11:27:20<05:03, 32.98it/s]global step 990000, trans_decision ep_re 1068.0497393330497

{"global_step": 990000, "eval_re": [824.5143048131414, 1445.2380958635074, 
1006.3308074613042, 917.3005563637345, 1053.6698852738373, 995.3270874237528, 
1163.8132706104689, 1005.5532833709925, 743.5835332744867, 1525.1665688752726], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [11:34:20<00:00, 32.96it/s]global step 1000000, trans_decision ep_re 987.8562570113136

{"global_step": 1000000, "eval_re": [1095.438767976783, 708.913379824478, 
1281.1658907339279, 980.8820213978169, 1004.2960527113449, 1093.4625219493792, 
802.225351905681, 895.5448991158498, 865.6281975284708, 1151.0054869694038], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [11:34:46<00:00, 23.99it/s]
