
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:30<13:08:42, 20.92it/s]global step 10000, trans_decision ep_re -190.39344561366613

{"global_step": 10000, "eval_re": [-183.26713550656964, -127.22347562027976, 
-225.18719575009405, -206.27960865947117, -151.9377280294499, -149.893263486876,
-204.84316423841062, -204.34579724894527, -231.37400151178463, 
-219.58308608478023], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [16:50<12:55:31, 21.06it/s]global step 20000, trans_decision ep_re -39.63279002421835

{"global_step": 20000, "eval_re": [38.62113493747876, -45.71416119272867, 
-58.674931456617124, -43.29314400675999, -69.55701239653023, -35.77791688915717,
25.75251635753328, -125.77141339670256, -43.959246562065154, 
-37.953725636634594], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  3%|▎         | 29998/1000000 [28:00<12:50:26, 20.98it/s]global step 30000, trans_decision ep_re 120.6326240875467

{"global_step": 30000, "eval_re": [-30.833693221272984, 81.87813817461189, 
232.2946413328136, 161.45893558314032, 42.00988612290449, 66.25190201825727, 
152.88675038864343, 219.28621094491083, 165.07390665678054, 116.01956287467759],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [39:10<12:33:53, 21.22it/s]global step 40000, trans_decision ep_re 550.6933725154113

{"global_step": 40000, "eval_re": [516.9913058995855, 547.3309087964836, 
506.42899661612074, 527.4461838935808, 585.3997042005788, 559.9119889995839, 
719.5636933138396, 374.7132720954324, 473.71212229883116, 695.4355490400762], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49998/1000000 [50:20<12:20:08, 21.39it/s]global step 50000, trans_decision ep_re 785.7978865347625

{"global_step": 50000, "eval_re": [927.8721994793619, 684.2910327023537, 
604.3579644655757, 889.7706891708166, 846.7277251909899, 579.5771095696571, 
922.7807543956513, 859.4417547671943, 735.9447635924868, 807.2148720135375], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59998/1000000 [1:01:30<12:15:00, 21.31it/s]global step 60000, trans_decision ep_re 803.7769708085345

{"global_step": 60000, "eval_re": [991.833330050209, 671.1741025857825, 
871.3893329696357, 770.6215947133381, 941.8641404169124, 631.5528562638291, 
775.0225828241375, 802.1939500346513, 674.2157723564379, 907.902045870412], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [1:12:50<12:16:25, 21.05it/s]global step 70000, trans_decision ep_re 856.9115937954896

{"global_step": 70000, "eval_re": [623.8338397784452, 615.1186094818046, 
36.83642224637792, 1566.8515576554003, 1098.3104743506428, 866.8666369753279, 
654.9843581560397, 639.6630485806834, 1416.202149563833, 1050.44884116634], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:24:00<12:01:26, 21.25it/s]global step 80000, trans_decision ep_re 1017.7414246256752

{"global_step": 80000, "eval_re": [1278.5485262960797, 1000.7758413996625, 
1072.1747996150725, 1501.8635515559965, 895.1517593464744, 634.9580308910475, 
1284.0296518940545, 1000.0994805868098, 737.3360349925348, 772.4765696790207], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89998/1000000 [1:35:00<11:43:10, 21.57it/s]global step 90000, trans_decision ep_re 1177.8340313835467

{"global_step": 90000, "eval_re": [961.134177762012, 1023.858905919642, 
1251.7653236953527, 1371.1831174490455, 1032.0044181038563, 1557.92539168741, 
1750.3746983592914, 826.489824300797, 1071.208723448976, 932.3957331090859], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:46:00<11:42:14, 21.36it/s]global step 100000, trans_decision ep_re 920.7766877922893

{"global_step": 100000, "eval_re": [927.8569836533976, 994.9999981296475, 
1093.0483521865901, 772.5239785168752, 963.9843545619716, 1031.7615578274513, 
672.5330259625275, 1055.8681578210237, 887.1288715118062, 808.0615977516021], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109997/1000000 [1:57:00<11:30:12, 21.49it/s]global step 110000, trans_decision ep_re 910.159236212633

{"global_step": 110000, "eval_re": [688.6013641841132, 1114.1089733089714, 
1039.2836815329217, 728.6294375382967, 1109.1112901892736, 673.5765485688424, 
763.6449001499496, 1075.5161923265512, 1227.2188015844667, 681.9011727429441], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [2:08:00<11:21:19, 21.53it/s]global step 120000, trans_decision ep_re 729.4232336000939

{"global_step": 120000, "eval_re": [587.2749332964501, 729.847891303322, 
581.7653667751079, 728.6510032086823, 905.4516038929883, 690.7907953205615, 
594.0613208000572, 676.9732053110662, 1001.661816961302, 797.7543991314026], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [2:19:00<11:12:14, 21.57it/s]global step 130000, trans_decision ep_re 720.194765977704

{"global_step": 130000, "eval_re": [666.1921047402465, 802.0825082868342, 
723.4854621312412, 834.5402344582861, 662.7209930897491, 674.5540237631712, 
647.016061635839, 772.1338893724043, 616.1337255088547, 803.0886567904147], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139998/1000000 [2:30:00<11:03:22, 21.61it/s]global step 140000, trans_decision ep_re 850.2435119084164

{"global_step": 140000, "eval_re": [1126.37250284989, 673.4981714493244, 
869.8599793531309, 823.4765754598694, 677.0599587009542, 711.813545082442, 
860.429985774796, 785.5209524445967, 1234.912441412752, 739.4910065564083], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:41:00<10:57:27, 21.55it/s]global step 150000, trans_decision ep_re 870.8695668792192

{"global_step": 150000, "eval_re": [807.2948558527453, 784.3835104768864, 
811.4109156836371, 814.775889737699, 806.4180558461406, 772.2938313676617, 
553.0366243082749, 987.9428304644208, 1406.3220625368474, 964.8170925178785], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159999/1000000 [2:51:50<10:48:31, 21.59it/s]global step 160000, trans_decision ep_re 735.0975136623852

{"global_step": 160000, "eval_re": [752.4264338543245, 1149.0313635520072, 
756.5541121477246, 677.7320853440444, 735.4672199187656, 568.6569406135445, 
726.2211783152685, 449.4727609832213, 793.4428086836161, 741.9702332113345], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [3:02:50<10:43:12, 21.51it/s]global step 170000, trans_decision ep_re 859.0647038221343

{"global_step": 170000, "eval_re": [930.2696503585687, 705.880292481012, 
777.336737507212, 995.2100739712142, 793.8798137513448, 802.1958797937529, 
790.5253090471411, 987.6090700033986, 848.4968158903683, 959.2433954173308], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179998/1000000 [3:13:50<10:31:24, 21.64it/s]global step 180000, trans_decision ep_re 1125.4144317962248

{"global_step": 180000, "eval_re": [1355.0470406093775, 1457.9078513184622, 
1069.1529862495925, 1379.1519391747786, 1579.1403477328968, 690.8816408409901, 
707.8775014985025, 907.4518566593631, 628.041168351082, 1479.4919855272012], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [3:24:40<10:25:31, 21.58it/s]global step 190000, trans_decision ep_re 872.2654372724497

{"global_step": 190000, "eval_re": [706.3931247849929, 951.7665780851104, 
770.5166038173493, 652.8909215421334, 747.3171554182882, 767.0165785235242, 
930.6242537930036, 950.5945834660382, 787.2762493468201, 1458.2583239472376], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199997/1000000 [3:35:40<10:14:39, 21.69it/s]global step 200000, trans_decision ep_re 789.1083601405915

{"global_step": 200000, "eval_re": [738.8978422601447, 805.4343964384877, 
736.4625242220071, 685.8315036737266, 1059.9048482019527, 797.865284880565, 
815.3502761364613, 886.4426637765592, 719.6569381402187, 645.2373236757921], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:46:40<10:08:07, 21.65it/s]global step 210000, trans_decision ep_re 859.220333173158

{"global_step": 210000, "eval_re": [898.1540089392981, 719.0683756966265, 
1459.6053593004701, 967.0794781863457, 776.453630609335, 773.3291714649149, 
672.3181901352672, 836.1362887311861, 710.7940987856525, 779.2647298824837], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [3:57:30<10:02:01, 21.59it/s]global step 220000, trans_decision ep_re 1041.1229193797285

{"global_step": 220000, "eval_re": [716.2419209335515, 884.4646165127765, 
774.2461319489875, 1493.003338392726, 907.1278812654409, 1051.3245073199234, 
1338.8856276912265, 919.0329051992577, 730.096704980188, 1596.8055595532053], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229998/1000000 [4:08:30<9:45:43, 21.91it/s]global step 230000, trans_decision ep_re 823.1948843709806

{"global_step": 230000, "eval_re": [894.7527763665271, 847.0069874684873, 
688.1594345717938, 792.3687549136805, 761.6038653975403, 831.0772072800494, 
963.36595867823, 738.3904898730134, 853.6879530789969, 861.5354160814876], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [4:19:20<9:44:04, 21.69it/s]global step 240000, trans_decision ep_re 796.8057986331944

{"global_step": 240000, "eval_re": [999.7859269517342, 865.6135287629663, 
932.6504211107471, 659.5018208027203, 902.1236336347177, 804.8389666558807, 
601.0041911232994, 795.4203405161306, 761.940102789481, 645.1790539842681], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [4:30:20<9:33:55, 21.78it/s]global step 250000, trans_decision ep_re 908.7885870704646

{"global_step": 250000, "eval_re": [824.1155053447559, 829.4301344273018, 
1397.2322741996, 1105.576166876055, 833.8891422814667, 770.0473479586967, 
905.0235687792646, 776.0466777809592, 718.843814166997, 927.6812388895494], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [4:41:10<9:34:10, 21.48it/s]global step 260000, trans_decision ep_re 882.9810766456928

{"global_step": 260000, "eval_re": [621.0758747951998, 985.1627024413012, 
911.1377598819981, 903.9412281362868, 775.9223020598821, 963.818794178543, 
1013.7919293185142, 769.8680438026221, 1095.5569028741563, 789.5352289684234], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [4:52:10<9:23:34, 21.59it/s]global step 270000, trans_decision ep_re 815.708277826471

{"global_step": 270000, "eval_re": [783.5964520756144, 676.3807813078766, 
845.0445758736428, 816.4220154495005, 691.6651782606386, 778.992703233443, 
983.4170754720423, 918.0123384936256, 968.4310218868806, 695.1206362114444], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [5:03:10<9:14:39, 21.64it/s]global step 280000, trans_decision ep_re 964.68568393451

{"global_step": 280000, "eval_re": [1079.5788180874463, 1041.438156574748, 
957.8511133749221, 1139.0057666225205, 930.7600939375654, 899.841783887024, 
928.1103951194548, 898.5410552468938, 937.1934344606777, 834.536222033849], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [5:14:00<9:08:31, 21.57it/s]global step 290000, trans_decision ep_re 1025.413404949306

{"global_step": 290000, "eval_re": [830.0150793640319, 1424.8624618915737, 
1192.4728265237413, 736.3452084481239, 1146.539827834497, 991.8572413354754, 
1202.268889557938, 907.779997299122, 933.1104001151904, 888.882117123365], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [5:25:00<9:03:49, 21.45it/s]global step 300000, trans_decision ep_re 887.999131347428

{"global_step": 300000, "eval_re": [825.5845520830536, 1088.72382425231, 
992.815667834689, 851.8297734457506, 1040.2060857381261, 799.382406903704, 
623.9628110748766, 967.5346750959947, 861.5776366345574, 828.3738804112174], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309998/1000000 [5:35:50<8:42:45, 22.00it/s]global step 310000, trans_decision ep_re 845.4685580667241

{"global_step": 310000, "eval_re": [863.8672724069629, 480.74348965809014, 
921.6412293725865, 1387.0362757158452, 720.8388540875162, 761.6723757391649, 
688.9230053251058, 961.8682496612588, 701.0394251407201, 967.0554035599912], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319998/1000000 [5:46:40<8:35:26, 21.99it/s]global step 320000, trans_decision ep_re 841.353992927546

{"global_step": 320000, "eval_re": [770.8775321002678, 720.1135342506818, 
791.8102749111529, 1060.6106972521775, 1036.2303442245736, 1044.8854802645387, 
669.9183334577906, 949.3722797732288, 695.8921939343728, 673.8292591066756], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329997/1000000 [5:57:30<8:32:11, 21.80it/s]global step 330000, trans_decision ep_re 1125.5197380485336

{"global_step": 330000, "eval_re": [1096.7502227211091, 1071.2666415817991, 
827.4021430951958, 1363.2128961896792, 1247.3585842912219, 1549.7139692471744, 
1658.630252554884, 821.2384229437953, 625.597265879587, 994.0269819808884], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [6:08:20<8:23:54, 21.83it/s]global step 340000, trans_decision ep_re 939.891808403447

{"global_step": 340000, "eval_re": [973.5788014657857, 820.4206630387264, 
724.2762833332398, 897.9951887400404, 772.1429070414048, 1401.8449565025653, 
1011.4148599934085, 990.2060420855826, 759.3344193084332, 1047.703962525284], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [6:19:10<8:17:48, 21.76it/s]global step 350000, trans_decision ep_re 1083.6803999591207

{"global_step": 350000, "eval_re": [1081.8930726965075, 1032.856082801793, 
1003.6546004675126, 1230.634532663741, 917.916453241841, 1045.7964016501026, 
827.1288933039942, 1486.5519350466025, 1103.4208495349696, 1106.9511781841409], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [6:30:00<8:10:02, 21.77it/s]global step 360000, trans_decision ep_re 935.397085770951

{"global_step": 360000, "eval_re": [840.1928877793198, 908.1990144788007, 
881.8233786355747, 918.3352748452262, 1151.484221352641, 877.6877760292289, 
832.0601894101933, 1283.4261315018332, 680.1639431648446, 980.5980405118486], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [6:41:00<8:03:30, 21.72it/s]global step 370000, trans_decision ep_re 907.5382068905343

{"global_step": 370000, "eval_re": [987.0149217655467, 1020.066681617988, 
760.16769720189, 1127.4469044052626, 1016.7298013747388, 796.6905110127321, 
743.1188987370364, 995.6413767431267, 785.6201099503393, 842.8851660966826], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379998/1000000 [6:51:50<7:50:29, 21.96it/s]global step 380000, trans_decision ep_re 965.2940886027711

{"global_step": 380000, "eval_re": [674.7954347084329, 909.1078678960565, 
1210.8297771668847, 1306.9951102813147, 1155.1218876368412, 1058.317957850458, 
887.5206416019082, 770.1800438099167, 683.9992374771217, 996.0729275987787], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389997/1000000 [7:02:40<7:47:24, 21.75it/s]global step 390000, trans_decision ep_re 1027.4869847630337

{"global_step": 390000, "eval_re": [741.9347677165633, 864.092889116954, 
701.6572794423156, 850.0453749929221, 1041.0847056411094, 1307.499286739137, 
1442.4765228936487, 1208.5122025305684, 639.9774676819139, 1477.5893508752024], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [7:13:30<7:39:49, 21.75it/s]global step 400000, trans_decision ep_re 1037.1024014811942

{"global_step": 400000, "eval_re": [692.2692798455895, 935.7902184809949, 
1388.9173660195002, 1509.330518147337, 737.2570773903237, 999.1274104746833, 
892.046144444414, 1089.52826299746, 915.4373907189522, 1211.320346292684], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [7:24:20<7:32:18, 21.74it/s]global step 410000, trans_decision ep_re 945.5133411390074

{"global_step": 410000, "eval_re": [949.2066776641793, 733.1692251662754, 
1012.2235230877945, 1189.1202482634976, 620.7081795484211, 951.4256705686137, 
995.6011176717575, 1139.5595778419317, 723.2408299154915, 1140.8783616621122], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419999/1000000 [7:35:10<7:23:34, 21.79it/s]global step 420000, trans_decision ep_re 773.5257181643501

{"global_step": 420000, "eval_re": [823.245862748635, 919.2110069887802, 
596.0627499472572, 893.9084118917614, 700.210949203211, 547.5308548598426, 
664.3724490797459, 835.9360945374063, 931.6913797913169, 823.087422595545], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [7:46:00<7:15:56, 21.79it/s]global step 430000, trans_decision ep_re 893.7582505501184

{"global_step": 430000, "eval_re": [985.6322553986439, 712.5286686364135, 
1139.433408297044, 733.2624472605607, 1144.0737577933173, 666.2542956759003, 
705.4164047090115, 1141.626228181641, 870.3660393807774, 838.9890001678746], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [7:57:00<7:09:29, 21.73it/s]global step 440000, trans_decision ep_re 926.1283720803322

{"global_step": 440000, "eval_re": [710.5846074728663, 746.4483623031992, 
1089.3301232673803, 989.6472071721477, 1048.0943596359573, 704.4294831775228, 
807.2610933842203, 779.8332627466701, 1394.2084977682332, 991.4467238751232], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449998/1000000 [8:07:50<6:56:58, 21.98it/s]global step 450000, trans_decision ep_re 832.8295518317178

{"global_step": 450000, "eval_re": [826.0799253059148, 784.7664613062178, 
770.7901869822701, 957.7991981739743, 826.2317093779704, 956.930639987119, 
882.9184442464833, 717.1242000041606, 997.5224461564304, 608.1323067766376], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459997/1000000 [8:18:40<6:53:53, 21.74it/s]global step 460000, trans_decision ep_re 1031.8299513317027

{"global_step": 460000, "eval_re": [895.9398589655708, 892.3940973378219, 
825.1693537250286, 989.707779930988, 1102.4779902605737, 1672.2830124827842, 
803.0260690279682, 1048.4217363747125, 985.9196340529396, 1102.9599811586372], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469997/1000000 [8:29:30<6:45:04, 21.81it/s]global step 470000, trans_decision ep_re 891.2740024933877

{"global_step": 470000, "eval_re": [1063.7768244612516, 1050.5217936300148, 
883.3189508455764, 704.1131270225151, 838.9646986085127, 655.1638381413558, 
952.554910803891, 710.8371391706588, 1140.3975004769904, 913.0912417731113], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [8:40:20<6:38:29, 21.75it/s]global step 480000, trans_decision ep_re 924.8765836119176

{"global_step": 480000, "eval_re": [1325.9620438852269, 1249.1836593566093, 
773.6688013802977, 967.817953930171, 791.7971610368392, 1011.9859408937028, 
824.9918801641121, 670.1558212733268, 945.0595476110444, 688.1430265878445], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [8:51:10<6:35:22, 21.50it/s]global step 490000, trans_decision ep_re 808.2133846735495

{"global_step": 490000, "eval_re": [689.8184285521986, 871.3784843394815, 
1019.0735594391168, 884.588016870363, 739.6612952688097, 626.3514565386089, 
447.35345871008013, 913.2443050496481, 784.7084459427393, 1105.9563960244484], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [9:02:00<6:23:07, 21.75it/s]global step 500000, trans_decision ep_re 875.7750642592464

{"global_step": 500000, "eval_re": [819.1467882566967, 1089.8034573477785, 
590.8570876356823, 729.0058583521065, 608.6430705286298, 774.273034193549, 
1112.4190765626834, 916.3126852260903, 1308.9445479941332, 808.3450364951149], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [9:12:50<6:14:54, 21.78it/s]global step 510000, trans_decision ep_re 1003.7828532565476

{"global_step": 510000, "eval_re": [1292.3396700837416, 1019.7433541363016, 
920.4284217267865, 937.9583514402824, 755.7477405902123, 1298.7876904496106, 
934.7277599528569, 787.7898776840515, 855.511578377341, 1234.7940881242916], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [9:23:40<6:08:37, 21.70it/s]global step 520000, trans_decision ep_re 1017.5854695285449

{"global_step": 520000, "eval_re": [885.1591712635033, 949.7489027949857, 
1222.080100889775, 965.68250216402, 866.6325844562244, 842.3780965927018, 
880.2550952682521, 1326.1493131098914, 968.7983336871788, 1268.970595058916], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529998/1000000 [9:34:40<5:56:10, 21.99it/s]global step 530000, trans_decision ep_re 837.4422989756486

{"global_step": 530000, "eval_re": [828.1890924535867, 911.2430588393273, 
826.2245924840593, 743.6773480770654, 877.5842273149348, 919.6823456773035, 
899.7242049935326, 731.1594760679202, 967.0227957654504, 669.9158480833044], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [9:45:30<5:48:35, 21.99it/s]global step 540000, trans_decision ep_re 970.9624947088953

{"global_step": 540000, "eval_re": [983.0738708298721, 864.6641058171797, 
823.6843832030993, 777.1726477516602, 877.8898068305226, 1232.8382216716418, 
1148.519962051171, 828.2622627378976, 1078.5329661344585, 1094.9867200614503], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549997/1000000 [9:56:20<5:44:05, 21.80it/s]global step 550000, trans_decision ep_re 929.3910756813854

{"global_step": 550000, "eval_re": [1087.000423204931, 935.1963215540089, 
850.2209657205278, 738.5558890630834, 882.0133579755247, 662.2359936963908, 
870.0614863644826, 1152.7764623630187, 1253.41603282795, 862.4338240439364], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [10:07:10<5:36:59, 21.76it/s]global step 560000, trans_decision ep_re 787.6646287640681

{"global_step": 560000, "eval_re": [762.8177799258401, 732.7692959019494, 
916.8641987497492, 859.5513096020214, 931.6651307210793, 591.8144859583919, 
851.0494550897064, 781.6669427115005, 718.7728632901375, 729.6748256903047], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [10:18:00<5:28:12, 21.84it/s]global step 570000, trans_decision ep_re 891.5323180852916

{"global_step": 570000, "eval_re": [1189.5449184376498, 943.6962955538336, 
91.41799543763743, 825.1615738195102, 836.7879037531418, 599.9515459137573, 
1235.3684301412995, 838.9449211687638, 1196.5283274737872, 1157.9212691535358], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [10:28:50<5:20:30, 21.84it/s]global step 580000, trans_decision ep_re 947.9577018124608

{"global_step": 580000, "eval_re": [896.5546980878249, 992.7130904283781, 
1199.7362460727018, 793.833695709413, 857.1008042510103, 860.854722489229, 
910.5902224773819, 881.163023729263, 1096.3679817345783, 990.662533144828], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [10:39:40<5:12:54, 21.84it/s]global step 590000, trans_decision ep_re 893.7363520538432

{"global_step": 590000, "eval_re": [772.6812685232769, 721.5387731122155, 
1673.3277462039787, 740.8309252744147, 600.7906931760981, 1022.367053771312, 
870.8139483377081, 687.2923705695723, 988.5477673476412, 859.1729742222136], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [10:50:40<5:06:07, 21.78it/s]global step 600000, trans_decision ep_re 956.241997097061

{"global_step": 600000, "eval_re": [783.3447489174637, 919.150615002271, 
867.5249784338451, 980.6715071889546, 1301.3962009922961, 837.3392273038903, 
714.1896047926906, 954.3142487250243, 1066.1923668151558, 1138.2964727990186], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [11:01:30<5:00:12, 21.65it/s]global step 610000, trans_decision ep_re 935.7979841009825

{"global_step": 610000, "eval_re": [1021.3497556208376, 954.0689993845326, 
1029.4361632596208, 886.5415059351203, 1049.164645225967, 780.6125410351948, 
1241.4313642157742, 915.9515742137156, 744.9313635528077, 734.4919285662552], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619998/1000000 [11:12:20<4:48:43, 21.94it/s]global step 620000, trans_decision ep_re 892.1747566227002

{"global_step": 620000, "eval_re": [1223.8673563491022, 915.9056235999466, 
1008.2881072869428, 786.2404581806494, 803.6584017914435, 969.0111751361765, 
784.3031267263507, 687.4506241169535, 925.2126603367711, 817.8100327026656], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [11:23:10<4:44:08, 21.70it/s]global step 630000, trans_decision ep_re 1001.665085598268

{"global_step": 630000, "eval_re": [696.5078182959908, 878.5978294943666, 
1162.8701861951415, 1408.4922600389323, 882.6387522413792, 1104.2178066965457, 
1183.92515063295, 773.4541078555947, 1020.5483960571743, 905.3985484746038], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [11:34:00<4:35:17, 21.80it/s]global step 640000, trans_decision ep_re 1049.9384610570592

{"global_step": 640000, "eval_re": [892.890603212639, 1091.792196225483, 
1043.7586036462667, 1197.1855758803335, 782.5296052505867, 1620.1575387510454, 
1232.006496674485, 1146.4032166506709, 796.6067385588932, 696.0540357201882], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [11:44:50<4:27:55, 21.77it/s]global step 650000, trans_decision ep_re 908.3742325835501

{"global_step": 650000, "eval_re": [739.9439213958092, 935.6817925022979, 
755.619888702237, 932.3790053268108, 833.028221673921, 1119.4838522636323, 
1075.6739766905987, 872.0871087540726, 1041.7621565749648, 778.082401951156], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [11:55:40<4:20:19, 21.77it/s]global step 660000, trans_decision ep_re 1091.8435468171383

{"global_step": 660000, "eval_re": [1099.487857917204, 1331.7085573033867, 
1222.006128770781, 1099.43974426673, 884.9086224973898, 812.4938249286652, 
1384.7140809598195, 988.6391788632833, 917.5076958258168, 1177.529776838308], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [12:06:40<4:12:02, 21.82it/s]global step 670000, trans_decision ep_re 901.0440543156681

{"global_step": 670000, "eval_re": [30.446544646375457, 886.6287081132, 
997.7465094441382, 1367.4729354306075, 1063.8656801731606, 938.5521149819205, 
767.1081140999345, 1069.7807186518203, 1065.291120902735, 823.5480967127883], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679998/1000000 [12:17:30<4:03:50, 21.87it/s]global step 680000, trans_decision ep_re 989.9397035034075

{"global_step": 680000, "eval_re": [869.6515176009476, 1171.1579509676524, 
1226.938713497901, 1379.4069332722397, 1027.257935385016, 788.2818347128049, 
817.3101628788088, 795.0555144163653, 861.5977711274186, 962.738701174919], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689997/1000000 [12:28:20<3:56:46, 21.82it/s]global step 690000, trans_decision ep_re 974.6783550980829

{"global_step": 690000, "eval_re": [1339.408719440331, 796.0507870706724, 
1020.0607492293825, 929.3013246734067, 1329.2480416988012, 753.4824437660062, 
754.1317183347956, 686.1935185529287, 779.831641558615, 1359.07460665589], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [12:39:10<3:49:45, 21.76it/s]global step 700000, trans_decision ep_re 1059.5046772362648

{"global_step": 700000, "eval_re": [832.8518178649821, 1021.836526681037, 
725.1375241652084, 772.2657207610483, 808.7598790956289, 1598.1858420789888, 
1151.416591899765, 1560.2109331541762, 1021.7262764344237, 1102.6556602273904], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [12:50:00<3:41:46, 21.79it/s]global step 710000, trans_decision ep_re 1143.6387693581567

{"global_step": 710000, "eval_re": [858.1324794481973, 1012.8953962671034, 
1414.3349414888316, 1095.1569445297114, 1129.5346868042423, 1572.9492425478595, 
806.0988543060863, 833.3785360724834, 1304.6986941359241, 1409.2079179811255], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [13:00:50<3:34:58, 21.71it/s]global step 720000, trans_decision ep_re 787.1386273603031

{"global_step": 720000, "eval_re": [961.0028377332602, 753.0933747275201, 
742.4133801729669, 808.1281281632882, 1006.0986359692387, 778.0905762300616, 
654.2666601871366, 729.6645849934886, 781.9093435331132, 656.7187518929569], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729998/1000000 [13:11:50<3:25:12, 21.93it/s]global step 730000, trans_decision ep_re 941.7758858701303

{"global_step": 730000, "eval_re": [1329.4990923465282, 796.6259439090576, 
978.2810532028645, 1229.7298739672806, 956.8762270668252, 783.8185097181054, 
952.8479163887023, 765.9246619389221, 937.8862647118325, 686.2693154511854], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739997/1000000 [13:22:40<3:18:47, 21.80it/s]global step 740000, trans_decision ep_re 840.3094636372873

{"global_step": 740000, "eval_re": [760.69595614161, 800.9549777684373, 
862.9667210966348, 900.9440557683888, 1186.011021298631, 1197.7466971523038, 
681.8058177745239, 719.191330027207, 551.3444942288855, 741.4335651162496], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [13:33:30<3:11:04, 21.81it/s]global step 750000, trans_decision ep_re 945.2771134509687

{"global_step": 750000, "eval_re": [769.8736558306771, 893.1406853534372, 
1028.1452190275713, 860.737036728686, 1306.9943444568655, 854.3297793937586, 
1037.5522599505848, 785.8207296800765, 734.1735664868711, 1182.0038576011575], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [13:44:20<3:03:44, 21.77it/s]global step 760000, trans_decision ep_re 909.1334026007456

{"global_step": 760000, "eval_re": [786.1427454508034, 1165.8057124141055, 
1185.9474607178524, 775.7870227683723, 852.207326442172, 879.0163432438316, 
734.4458355923074, 899.1300189390964, 777.4668017293559, 1035.3847587095604], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [13:55:20<2:54:20, 21.99it/s]global step 770000, trans_decision ep_re 861.661587110156

{"global_step": 770000, "eval_re": [887.2470009554291, 935.7301788290166, 
993.5038519508646, 946.6724243422805, 767.7000792261563, 1125.179198908308, 
786.1975770951385, 751.8239292730943, 578.6764777870013, 843.8851527342703], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [14:06:10<2:49:12, 21.67it/s]global step 780000, trans_decision ep_re 948.9507589209024

{"global_step": 780000, "eval_re": [956.6970058662718, 768.1382056916021, 
814.3782498520477, 1421.0816794006087, 847.7792856774203, 1084.3950082343265, 
1396.8408873257445, 805.1650117972257, 759.5283840590951, 635.5038713046816], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [14:17:00<2:40:34, 21.80it/s]global step 790000, trans_decision ep_re 857.2509773873131

{"global_step": 790000, "eval_re": [719.984557248695, 882.7330141312899, 
840.2937587955169, 957.1454873722828, 647.0385078851855, 857.7742500393688, 
850.3448980413225, 896.7414011501838, 1113.1783969075918, 807.2755023016949], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [14:27:50<2:33:11, 21.76it/s]global step 800000, trans_decision ep_re 830.8983266852953

{"global_step": 800000, "eval_re": [771.6908551000015, 880.3524687147269, 
710.6150306038874, 847.8395355588499, 815.0501085212219, 762.8301650983171, 
967.7961238982446, 729.8590329265229, 964.8860759091385, 858.0638705220435], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [14:38:50<2:25:42, 21.73it/s]global step 810000, trans_decision ep_re 1063.7054584672678

{"global_step": 810000, "eval_re": [1113.4258168538615, 1028.8499743963475, 
847.5128992323725, 571.9020958524195, 1485.7247268300362, 1569.6040911299967, 
1304.0564617567202, 796.8881224645363, 1173.7572058926091, 745.3331902637791], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819998/1000000 [14:49:40<2:16:23, 21.99it/s]global step 820000, trans_decision ep_re 823.1511928513886

{"global_step": 820000, "eval_re": [867.3589376780985, 776.2857322922723, 
825.5020496192573, 734.4882805153202, 763.5584693577279, 852.2700917843435, 
1125.3309378332358, 949.3226570207279, 676.235847568744, 661.1589248441595], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [15:00:30<2:10:12, 21.76it/s]global step 830000, trans_decision ep_re 807.2723656182802

{"global_step": 830000, "eval_re": [696.1124469021227, 814.0266581659426, 
967.0364026549199, 736.774234749722, 752.201874830383, 721.9136828273205, 
911.5979971488409, 887.6987078443731, 716.8617336651458, 868.4999173940316], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [15:11:20<2:02:38, 21.74it/s]global step 840000, trans_decision ep_re 1028.7653966144935

{"global_step": 840000, "eval_re": [993.3063755760096, 908.2899716133178, 
906.9588181238802, 698.8728236607163, 1149.8746672255318, 1626.895580312632, 
1031.3411304578965, 960.1186455468224, 1141.5836299658918, 870.4123236622368], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [15:22:10<1:54:34, 21.82it/s]global step 850000, trans_decision ep_re 915.6250545534986

{"global_step": 850000, "eval_re": [838.6855261764142, 1386.0225500678396, 
820.1804046960367, 931.1221045986933, 913.0892750483401, 771.326622650061, 
1076.0488000549067, 748.6940820658152, 886.8145371935519, 784.2666429833255], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [15:33:11<1:47:16, 21.75it/s]global step 860000, trans_decision ep_re 876.1497928091327

{"global_step": 860000, "eval_re": [741.3523603132813, 686.0459987437187, 
886.8271482854275, 915.1318071575554, 758.6134731018128, 641.3861362764774, 
1075.8262377299602, 1052.052971461367, 844.7081372206683, 1159.5536578010585], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869998/1000000 [15:44:01<1:38:35, 21.98it/s]global step 870000, trans_decision ep_re 881.72886740951

{"global_step": 870000, "eval_re": [747.108858794647, 955.7529179473053, 
984.8618494215976, 648.3966088880379, 825.6662236602613, 1361.5309894665575, 
711.4095988463121, 979.6575646818824, 779.4954912431235, 823.4085711453755], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879998/1000000 [15:54:51<1:31:04, 21.96it/s]global step 880000, trans_decision ep_re 785.9089805983529

{"global_step": 880000, "eval_re": [975.5015574019953, 829.8235391608874, 
694.9050199734431, 1074.613792309673, 707.8167081200781, 870.0591817812699, 
656.7319748165374, 687.4926245683469, 598.9199414726842, 763.2254663786131], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889997/1000000 [16:05:41<1:24:01, 21.82it/s]global step 890000, trans_decision ep_re 924.0336623264506

{"global_step": 890000, "eval_re": [966.8502188168914, 900.903394115195, 
824.6228589439643, 1215.7822569101004, 901.7261487244997, 1034.9219306800178, 
735.5241985086434, 659.1126071427784, 889.9105323715638, 1110.9824770508521], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899997/1000000 [16:16:31<1:16:24, 21.81it/s]global step 900000, trans_decision ep_re 853.7136054836686

{"global_step": 900000, "eval_re": [985.2244702830802, 789.4464364129819, 
805.5965054294745, 898.4058474912418, 786.6549701527714, 992.7837407472915, 
805.5555717775278, 735.2230638959284, 737.7361788909254, 1000.5092697554632], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [16:27:21<1:08:59, 21.74it/s]global step 910000, trans_decision ep_re 886.9926318815457

{"global_step": 910000, "eval_re": [1111.369798399293, 752.4822559192211, 
983.025604225655, 804.0004529749152, 612.9122600811883, 873.6503228870034, 
991.0274128371575, 1209.586918243999, 716.7107298550968, 815.160563391928], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [16:38:11<1:01:09, 21.80it/s]global step 920000, trans_decision ep_re 912.5671655683434

{"global_step": 920000, "eval_re": [851.4272219651319, 823.0072396236172, 
1037.9660078344666, 1258.6789164538993, 777.8392116489217, 860.0632836741005, 
705.9439356174288, 959.8292419950686, 772.548308864593, 1078.3682880062076], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [16:49:01<53:33, 21.78it/s]global step 930000, trans_decision ep_re 855.8111419995512

{"global_step": 930000, "eval_re": [841.7470806961643, 747.2761213170093, 
761.1099795177316, 793.5328027909895, 999.4081171260251, 1019.7476813067243, 
582.951495552695, 732.0442663881621, 896.3293131592329, 1183.9645621407788], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [16:59:51<45:56, 21.77it/s]global step 940000, trans_decision ep_re 918.9827923156885

{"global_step": 940000, "eval_re": [922.6256410183707, 754.3314043800333, 
1098.653218718461, 958.229628195124, 831.4004240897185, 816.5337174699237, 
1101.6903417698468, 922.6431265912405, 936.3559021570194, 847.3645187671464], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [17:10:41<39:04, 21.32it/s]global step 950000, trans_decision ep_re 808.8034877054497

{"global_step": 950000, "eval_re": [835.7715804172732, 780.6859655150298, 
977.8333162197379, 753.5132052580484, 953.9623706637839, 548.9988368907228, 
1018.5693510656037, 760.651726839304, 773.3951013526973, 684.6534228322955], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [17:21:41<30:58, 21.53it/s]global step 960000, trans_decision ep_re 791.3266846997637

{"global_step": 960000, "eval_re": [1027.7293234676974, 847.3725440270667, 
970.9504411914163, 802.9314554355387, 838.002007200415, 809.3834155961213, 
788.012527713859, 883.1423479300502, 730.6622123727368, 215.0805720627368], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969998/1000000 [17:32:41<22:49, 21.91it/s]global step 970000, trans_decision ep_re 822.2256709407504

{"global_step": 970000, "eval_re": [937.1181564367113, 752.940311198399, 
898.5921370212253, 814.9388371769412, 698.5118769037337, 858.2321492053993, 
898.0594782689619, 631.9636726273633, 1035.3271821496328, 696.5729084191371], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [17:43:41<15:25, 21.61it/s]global step 980000, trans_decision ep_re 882.8948107995226

{"global_step": 980000, "eval_re": [1471.593464967497, 787.3841860645487, 
759.0270339330069, 934.1799523156674, 795.898734538058, 808.8914347925898, 
936.162980678719, 690.0385404027671, 743.2109871407417, 902.5607931616299], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [17:54:31<07:39, 21.76it/s]global step 990000, trans_decision ep_re 838.2170654665294

{"global_step": 990000, "eval_re": [876.333668223098, 995.3324043107909, 
707.7118502860308, 901.0977720808951, 570.4680212951954, 687.3972088539236, 
1125.9619018648837, 888.0087384737186, 781.9767352769235, 847.8823539998361], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [18:05:31<00:00, 21.58it/s]global step 1000000, trans_decision ep_re 864.2312657833976

{"global_step": 1000000, "eval_re": [1039.9644524675978, 785.7458217972604, 
624.3138991318348, 1242.6567046826192, 761.763056143024, 1042.4974169500774, 
820.9706751969353, 830.8702461067744, 782.4361691910392, 711.0942161668149], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [18:06:10<00:00, 15.34it/s]
