
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:30<10:47:08, 25.50it/s]global step 10000, trans_decision ep_re -312.5465763530305

{"global_step": 10000, "eval_re": [-265.2069486773378, -278.1123864229151, 
-370.72862113429017, -278.5064793269384, -422.4030162857064, -284.1713200616496,
-299.9675038361368, -301.06576072936997, -302.0319927337791, 
-323.2717343221819], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [13:50<10:43:55, 25.37it/s]global step 20000, trans_decision ep_re -91.12375417327593

{"global_step": 20000, "eval_re": [-74.74519204265813, -86.09749238420864, 
6.601866204633614, -32.7010302676415, -19.501910682301503, -112.49728513043499, 
-398.07991567418645, -7.726073077032245, -117.48348789021804, 
-69.00702078871133], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  3%|▎         | 29998/1000000 [23:00<10:24:31, 25.89it/s]global step 30000, trans_decision ep_re 440.0014969232149

{"global_step": 30000, "eval_re": [415.6770175669566, 437.5046087840773, 
471.85937607505207, 554.1929444815156, -246.58293092816862, 319.1677517816664, 
974.7844461204091, 403.4869992968609, 528.4682586408394, 541.4564974129403], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [32:10<10:28:29, 25.46it/s]global step 40000, trans_decision ep_re 501.81387703838635

{"global_step": 40000, "eval_re": [635.7492708044458, -300.6733707474719, 
576.6709779342286, 604.4397668014452, 592.8358624567701, 557.6831806741329, 
596.5865387335851, 618.3181342454924, 620.9125638940832, 515.615845587152], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49999/1000000 [41:20<10:26:17, 25.28it/s]global step 50000, trans_decision ep_re 403.0397075516016

{"global_step": 50000, "eval_re": [-138.8553064046476, 710.8467000095031, 
879.4140490487447, 597.0084475398106, 507.0363899159101, 555.5089334733506, 
547.1505505772024, 626.6412662978136, -521.9524502117353, 267.59849527006327], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59998/1000000 [50:30<10:07:40, 25.78it/s]global step 60000, trans_decision ep_re 451.3881819512406

{"global_step": 60000, "eval_re": [599.1127315865823, 575.8346782311935, 
816.0646461280429, 620.6522410411828, 656.6214302971669, -391.53605523326553, 
-398.5351236025175, 859.8343121099244, 524.1037959772417, 651.7291629768544], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [59:40<10:12:09, 25.32it/s]global step 70000, trans_decision ep_re 605.2116745509746

{"global_step": 70000, "eval_re": [778.5140279214431, 630.1796356205919, 
-188.60359265127926, 557.9018604537216, 857.2853443372763, 714.273828338014, 
743.1891565989476, 602.2543878790774, 633.9429747416048, 723.1791222703497], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:09:00<10:03:58, 25.39it/s]global step 80000, trans_decision ep_re 736.1536419219188

{"global_step": 80000, "eval_re": [701.7427398345172, 749.8681635032475, 
732.5634346764862, 808.2770482835172, 704.4729851823608, 616.3242610856641, 
591.0071437297431, 726.2246636263575, 973.6866675092546, 757.3693117880402], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89998/1000000 [1:18:10<9:50:46, 25.67it/s]global step 90000, trans_decision ep_re 676.9527801161805

{"global_step": 90000, "eval_re": [733.3012566413917, 684.8021782609181, 
620.6988347414739, 794.4178767165223, 599.634758770077, 601.6751831012747, 
551.2615754927293, 775.0581696788007, 717.0913350420576, 691.5866327165585], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [1:27:20<9:49:57, 25.43it/s]global step 100000, trans_decision ep_re 635.4239256244197

{"global_step": 100000, "eval_re": [742.8388722518519, 639.5701064782036, 
606.915713619247, 902.0913439364431, 643.9831994734604, 684.1182224893743, 
891.6220917474121, -241.03643401491064, 788.4758119707319, 695.6603282923821], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [1:36:30<9:34:10, 25.83it/s]global step 110000, trans_decision ep_re 776.6072898649509

{"global_step": 110000, "eval_re": [843.3762915090847, 776.6550933658272, 
729.1999550063288, 962.1324330583362, 787.2123052931681, 632.457232633701, 
793.1611507955871, 782.3599671327742, 727.6435415355742, 731.8749283191278], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119998/1000000 [1:45:50<9:36:03, 25.46it/s]global step 120000, trans_decision ep_re 811.1173603203597

{"global_step": 120000, "eval_re": [673.6500826654793, 864.5488331140907, 
753.0950660018198, 846.3572233640105, 642.5763070422284, 642.9395764770035, 
965.6036518764151, 686.4720711212506, 937.7552055136465, 1098.175586027653], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:55:00<9:36:42, 25.14it/s]global step 130000, trans_decision ep_re 819.5092332627149

{"global_step": 130000, "eval_re": [864.8207922333024, 943.8240424625947, 
707.7003029196986, 840.32302240013, 1121.659604797576, 613.3352468317814, 
672.3229817112933, 717.1435371796097, 986.7959186364485, 727.1668834547157], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [2:04:10<9:22:04, 25.50it/s]global step 140000, trans_decision ep_re 872.3825092747577

{"global_step": 140000, "eval_re": [791.9909512436558, 1045.3419069938109, 
746.3330981885592, 598.215236672434, 1230.824102085347, 886.1984955955508, 
915.0879704635914, 876.900004246629, 744.0255609017374, 888.9077663562615], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:13:30<9:23:28, 25.14it/s]global step 150000, trans_decision ep_re 811.7704994723795

{"global_step": 150000, "eval_re": [604.5024170532894, 800.6376936387335, 
665.4629255494665, 876.3081696379354, 731.7821099411711, 869.2371755656076, 
936.9217341039797, 800.3857546452483, 917.8304925494193, 914.6365220389441], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [2:22:40<9:05:44, 25.65it/s]global step 160000, trans_decision ep_re 772.288442858834

{"global_step": 160000, "eval_re": [775.9452362244082, 734.9127537044358, 
958.4550347960793, 794.33821974811, 607.4431044017175, 866.0561202311457, 
792.5970714343815, 791.4417203981035, 677.6160470593915, 724.0791205905673], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169998/1000000 [2:31:50<8:55:46, 25.82it/s]global step 170000, trans_decision ep_re 798.8948735519277

{"global_step": 170000, "eval_re": [1221.5630760147437, 710.740135479987, 
208.82272392808432, -315.2187524566163, 846.9278108623865, 1199.7168143928627, 
1429.35874181662, 836.2831325925159, 799.1799691885894, 1051.5750837001028], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179998/1000000 [2:41:10<8:47:19, 25.92it/s]global step 180000, trans_decision ep_re 908.0051201011207

{"global_step": 180000, "eval_re": [979.548253275985, 809.0853690076725, 
886.7574030927924, 1150.4614852068812, 1223.186214439986, 945.3816629508924, 
718.6408368320679, 741.1028413365102, 747.0249182091563, 878.8622166592631], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:50:20<8:54:16, 25.27it/s]global step 190000, trans_decision ep_re 995.8952082538442

{"global_step": 190000, "eval_re": [704.0020467817756, 873.552361517217, 
943.4735987488489, 778.1971260760782, 1058.4958581789735, 1143.7510165025808, 
796.5942463455716, 1311.9397021432185, 1404.273318532734, 944.6728077114449], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [2:59:30<8:41:45, 25.55it/s]global step 200000, trans_decision ep_re 629.9329858913641

{"global_step": 200000, "eval_re": [733.0721285835991, 522.3180130989729, 
715.3317788880266, 867.3265905097795, 624.6085183813441, 34.6350635747222, 
722.4671555612828, 623.8730855235923, 784.3406534366596, 671.3568713556625], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209997/1000000 [3:08:40<8:35:10, 25.56it/s]global step 210000, trans_decision ep_re 719.8492713261389

{"global_step": 210000, "eval_re": [823.7419102169863, 682.5893064359841, 
658.7504975762048, 755.282479383029, 732.8411445016292, 624.1415978311941, 
772.1233837565773, 658.3926213911647, 752.7288201201466, 737.9009520484734], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219998/1000000 [3:17:50<8:22:34, 25.87it/s]global step 220000, trans_decision ep_re 798.0699951072435

{"global_step": 220000, "eval_re": [796.1124775623571, 895.6974928955386, 
744.8772456288604, 714.6489719577683, 783.1471350712065, 921.0875704123745, 
863.4403668364097, 672.5214466556084, 868.1707103671055, 720.9965336852053], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [3:27:00<8:23:57, 25.47it/s]global step 230000, trans_decision ep_re 841.7654241303608

{"global_step": 230000, "eval_re": [696.2162520567946, 807.8909295565786, 
997.0170622042826, 1027.9904820412182, 841.3078640398752, 625.6489014743073, 
885.2625977581544, 755.9440223290159, 955.6707390034451, 824.7053908399364], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [3:36:00<8:12:04, 25.74it/s]global step 240000, trans_decision ep_re 761.1369795167203

{"global_step": 240000, "eval_re": [822.1806947855072, 744.2175177977778, 
906.8157969735579, 711.7357677408704, 797.9610912549092, 593.0887949699189, 
744.093217963424, 652.8927975735475, 852.6068250262477, 785.7772910814416], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [3:45:00<8:02:49, 25.89it/s]global step 250000, trans_decision ep_re 800.0578178885129

{"global_step": 250000, "eval_re": [716.5824059141062, 743.1386769233372, 
729.4105741087551, 635.286024165733, 840.8951943955716, 748.572188349929, 
722.3691823252947, 1280.3173617163895, 812.060436675325, 771.9461343106875], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [3:54:10<7:53:27, 26.05it/s]global step 260000, trans_decision ep_re 883.4083471089247

{"global_step": 260000, "eval_re": [691.4557784045336, 740.3045625484606, 
978.1151021453506, 1153.0824100083219, 1266.0413355052087, 692.5890819488253, 
757.5333262987145, 799.121103918132, 1021.8035893085408, 734.0371810031581], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [4:03:20<7:51:13, 25.82it/s]global step 270000, trans_decision ep_re 751.7741513341692

{"global_step": 270000, "eval_re": [925.0925268419589, 1029.8248049637784, 
-217.26310705990238, 696.9782710235796, 1264.6284522073204, 838.7563443782016, 
686.9657284906848, 842.8432801143601, 658.0453561299469, 791.8698562517634], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279998/1000000 [4:12:20<7:38:17, 26.18it/s]global step 280000, trans_decision ep_re 722.0859032586352

{"global_step": 280000, "eval_re": [701.3658901136312, 858.7798315389882, 
778.029272300209, 702.7716658362918, 679.0297484662175, 696.316601277531, 
661.5591542756141, 709.703885698195, 721.8576307506042, 711.4453523290695], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [4:21:30<7:37:29, 25.87it/s]global step 290000, trans_decision ep_re 874.6181958712156

{"global_step": 290000, "eval_re": [914.2838314077038, 1023.7346579656909, 
635.0978955018078, 719.0335176029836, 953.697682328496, 1203.0446994240617, 
898.2853855996082, 550.6202525893431, 871.921955590233, 976.4620807022271], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299998/1000000 [4:30:30<7:30:05, 25.92it/s]global step 300000, trans_decision ep_re 839.7754534975581

{"global_step": 300000, "eval_re": [731.4994272600682, 760.5066078030316, 
837.3903960555406, 1058.51143247214, 828.1784350886699, 634.8843510069994, 
1278.4563541690075, 722.5506666709106, 759.5383141863769, 786.2385502628375], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [4:39:40<7:32:50, 25.40it/s]global step 310000, trans_decision ep_re 761.9287646135544

{"global_step": 310000, "eval_re": [875.2157472578695, 673.8820661784051, 
664.7344843571955, 748.7719268034546, 784.3892703967261, 726.0403986898723, 
786.2762679464278, 784.25816131201, 917.170128298026, 658.5491948955573], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319998/1000000 [4:48:40<7:18:11, 25.86it/s]global step 320000, trans_decision ep_re 824.0275763521574

{"global_step": 320000, "eval_re": [734.4100461972048, 645.8085556923811, 
242.52536439133783, 820.9843754846112, 899.7331816304086, 998.1781087057178, 
871.5530213991035, 888.3799126395757, 816.6189232786126, 1322.0842741026202], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329997/1000000 [4:57:50<7:14:32, 25.70it/s]global step 330000, trans_decision ep_re 819.4406775063169

{"global_step": 330000, "eval_re": [983.0771269824817, 818.0027919699351, 
708.2004272432246, 833.9197361769727, 756.000131101325, 977.8002357215918, 
725.8159023739744, 708.4785861950713, 763.5765445583204, 919.5352927402721], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339997/1000000 [5:07:00<7:11:30, 25.49it/s]global step 340000, trans_decision ep_re 1014.7475145638155

{"global_step": 340000, "eval_re": [819.2976608859955, 1100.4659492705446, 
1494.0501442987331, 1210.9026845269348, 971.3377140755684, 827.6209593956758, 
770.6982392366899, 1039.4047748361897, 1023.9739098968529, 889.7231092149681], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349998/1000000 [5:16:10<6:54:57, 26.11it/s]global step 350000, trans_decision ep_re 898.6527291902061

{"global_step": 350000, "eval_re": [810.1819154227319, 769.3745096791989, 
866.1039296314711, 997.1971593622485, 1032.3715550506915, 1115.6625611257557, 
1020.3195922368368, 856.098071140051, 722.2440203579558, 796.9739778951183], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359998/1000000 [5:25:20<6:54:51, 25.71it/s]global step 360000, trans_decision ep_re 850.9488125787435

{"global_step": 360000, "eval_re": [896.497009626605, 712.0630993771647, 
965.3784545051303, 923.4631228183239, 831.8616318639364, 821.4069491601059, 
847.2163092310234, 773.5759124108298, 872.715579259718, 865.3100575345986], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [5:34:30<6:48:35, 25.70it/s]global step 370000, trans_decision ep_re 866.4477114093988

{"global_step": 370000, "eval_re": [820.234105984346, 813.2716564896167, 
766.725853906579, 873.6554508715434, 876.687507995518, 1036.8039497178145, 
1139.4530380845056, 696.5130435848984, 970.683174103679, 670.4493333554883], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [5:43:40<6:48:54, 25.27it/s]global step 380000, trans_decision ep_re 701.3745388039185

{"global_step": 380000, "eval_re": [792.8509980206304, 905.6692533602699, 
223.83703270093656, 724.8919532792767, 799.1606980012195, 516.5234698445092, 
614.9768663778481, 659.9442662218892, 713.0932200313655, 1062.79763020124], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389999/1000000 [5:52:50<6:40:08, 25.41it/s]global step 390000, trans_decision ep_re 855.8913438796023

{"global_step": 390000, "eval_re": [1131.8650404459265, 955.1033205142544, 
753.7334913827267, 922.6624922521332, 722.4470604729186, 152.8603638206053, 
984.3544645930264, 1017.5422485013585, 851.5721481694665, 1066.772808643608], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [6:02:00<6:29:56, 25.64it/s]global step 400000, trans_decision ep_re 711.7534402508678

{"global_step": 400000, "eval_re": [542.2745346653464, 795.9187661874766, 
755.7095278593025, 647.8246561137224, 632.5793107593465, 723.0623623644327, 
750.4548910702008, 753.4555631091814, 710.2065557617756, 806.0482346178928], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [6:11:00<6:25:44, 25.49it/s]global step 410000, trans_decision ep_re 883.1164937977517

{"global_step": 410000, "eval_re": [835.5902321619102, 967.0039912473012, 
1008.5075461012538, 1004.8323828821831, 869.3189537771316, 942.9268027072673, 
872.0711361966178, 755.4164857545236, 902.4967418407052, 673.000665308624], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [6:20:10<6:21:21, 25.35it/s]global step 420000, trans_decision ep_re 926.4749952861681

{"global_step": 420000, "eval_re": [1004.3788486732773, 849.4045633668137, 
824.3686907713773, 722.5174776435045, 1200.386485829985, 897.2327798035155, 
821.9603008128615, 1087.8363475652975, 909.7925757194871, 946.8718826755616], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429998/1000000 [6:29:20<6:06:31, 25.92it/s]global step 430000, trans_decision ep_re 888.6558753582452

{"global_step": 430000, "eval_re": [1063.1010109000213, 1131.875036422252, 
811.9261973074248, 870.579408983656, 820.0052050708363, 727.6915406865103, 
808.0121276801248, 1229.4809108982915, 645.8898593286353, 777.9974563046995], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439998/1000000 [6:38:40<6:05:55, 25.51it/s]global step 440000, trans_decision ep_re 780.7130605161194

{"global_step": 440000, "eval_re": [795.9782346148114, 761.0545911574162, 
863.0590569238803, 754.3698696651585, 598.1747700296032, 864.2203299773901, 
918.8718660014819, 715.2728851238932, 771.7168765156238, 764.412125151935], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449998/1000000 [6:47:50<5:56:11, 25.74it/s]global step 450000, trans_decision ep_re 830.2957253797573

{"global_step": 450000, "eval_re": [851.8451711803649, 812.3331841373413, 
881.3755524087966, 1013.3400871799454, 786.3794401527437, 666.6954921661517, 
734.4956624351512, 826.5946983454406, 1031.9354959098835, 697.9624698817535], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459998/1000000 [6:57:00<5:47:35, 25.89it/s]global step 460000, trans_decision ep_re 922.6451517578191

{"global_step": 460000, "eval_re": [794.4513363374338, 900.7648479972758, 
882.8823183896685, 1249.3730900694668, 845.5780072528, 818.784955527018, 
944.7081391481539, 995.9669321147973, 835.8693943368003, 958.0724964047763], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [7:06:10<5:44:59, 25.60it/s]global step 470000, trans_decision ep_re 739.801184991987

{"global_step": 470000, "eval_re": [890.6137164100768, 717.0764767092907, 
378.4908980952617, 815.2326978084867, 873.1551717718463, 767.730079511858, 
880.2545926954906, 779.5342402108289, 643.5560913996103, 652.3678853071209], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479998/1000000 [7:15:20<5:39:37, 25.52it/s]global step 480000, trans_decision ep_re 934.3742652475318

{"global_step": 480000, "eval_re": [970.5169524877175, 795.3498160791399, 
1032.3688143274908, 769.1894328891053, 782.5609197973627, 776.27467008113, 
793.6385527159075, 1170.0498165665842, 758.0681871174436, 1495.7254904134352], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [7:24:30<5:33:18, 25.50it/s]global step 490000, trans_decision ep_re 807.1405667044559

{"global_step": 490000, "eval_re": [695.1003071148616, 1375.791890714577, 
909.8512413714163, 689.5835806486928, 765.1400870347927, 711.3373084625222, 
449.9791175082426, 756.749719461691, 687.4523989508757, 1030.4200157768867], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499998/1000000 [7:33:50<5:29:03, 25.33it/s]global step 500000, trans_decision ep_re 831.0370149471852

{"global_step": 500000, "eval_re": [942.566231846994, 796.7125431650284, 
705.5301266290492, 1009.6174216367606, 1037.119609014902, 753.8408865639368, 
610.61929828401, 831.741541840848, 753.2472536356571, 869.3752368546665], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509997/1000000 [7:43:00<5:20:32, 25.48it/s]global step 510000, trans_decision ep_re 742.8814822874564

{"global_step": 510000, "eval_re": [835.4003970635084, 1069.881989770262, 
634.1594759284854, 741.0687614432564, 682.8506515213978, 703.1907087315617, 
819.8601933468175, 562.9722028872009, 724.6069617788175, 654.8234804032555], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [7:52:10<5:17:31, 25.19it/s]global step 520000, trans_decision ep_re 802.4543177900084

{"global_step": 520000, "eval_re": [769.8986407054736, 820.013980877807, 
865.7494349329311, 790.6626064359532, 767.7980687900814, 961.3507678853337, 
738.7796762558801, 875.2357533888201, 694.9029234077342, 740.1513252200699], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529999/1000000 [8:01:20<5:08:30, 25.39it/s]global step 530000, trans_decision ep_re 790.0653320305007

{"global_step": 530000, "eval_re": [681.9298776529888, 775.4885314663114, 
751.8323874625521, 819.1801115849682, 742.918216779219, 809.3366402454367, 
791.6134464997259, 980.2902895928498, 928.8242552959514, 619.2395637250036], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [8:10:30<4:58:46, 25.66it/s]global step 540000, trans_decision ep_re 854.6786653912192

{"global_step": 540000, "eval_re": [609.5503524244908, 1057.5441870912234, 
813.5147291957765, 986.4115989650675, 663.629414412233, 910.4152332277234, 
889.6080945346254, 876.9911861010331, 923.1919157859431, 815.929942174076], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [8:19:50<4:56:53, 25.26it/s]global step 550000, trans_decision ep_re 793.6024904709627

{"global_step": 550000, "eval_re": [754.8113275930501, 1058.3650184268006, 
809.4878152633157, 1005.2968310513019, 241.0167401052047, 608.4262459622765, 
831.862214751664, 817.7362173309022, 929.0226794246594, 879.9998148004529], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559998/1000000 [8:29:00<4:44:34, 25.77it/s]global step 560000, trans_decision ep_re 850.4801503207138

{"global_step": 560000, "eval_re": [906.218760851325, 810.2929103499376, 
1167.6801469797933, 697.4006425956154, 693.2306103075465, 770.6103171622339, 
775.6674916386372, 1096.946891813679, 785.7457617401964, 801.0079697681734], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [8:38:10<4:40:03, 25.59it/s]global step 570000, trans_decision ep_re 863.8872743744708

{"global_step": 570000, "eval_re": [1071.301813966468, 708.2413256711119, 
920.7548887902026, 1079.5484035490997, 786.5217317881398, 754.5613532034789, 
818.7338349452393, 892.3667965167386, 744.7244899739075, 862.1181053403229], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [8:47:10<4:33:24, 25.60it/s]global step 580000, trans_decision ep_re 795.814632543905

{"global_step": 580000, "eval_re": [524.7293023316422, 880.6728888302399, 
816.9318709818697, 779.9332601491462, 821.9157907130428, 918.3129740334667, 
871.4066211492981, 850.1671206250799, 782.2752833257539, 711.8012132995099], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [8:56:20<4:27:00, 25.59it/s]global step 590000, trans_decision ep_re 817.0322880005294

{"global_step": 590000, "eval_re": [906.4293123259883, 779.3791253562478, 
769.740358809321, 837.2681201486287, 721.135876172542, 738.7406392294313, 
794.9084310350049, 1025.5806724638903, 836.0339658974893, 761.1063785667503], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599998/1000000 [9:05:30<4:17:09, 25.92it/s]global step 600000, trans_decision ep_re 838.9764914928049

{"global_step": 600000, "eval_re": [826.336548908044, 810.2271357505134, 
615.2992718907612, 749.369252909732, 1008.0159005351123, 701.4180382373183, 
748.3708991716151, 870.9405970316249, 966.2704823146097, 1093.5167881787186], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [9:14:40<4:13:16, 25.66it/s]global step 610000, trans_decision ep_re 898.573196962711

{"global_step": 610000, "eval_re": [1209.7220782538209, 1050.7109964067893, 
755.1251253001216, 802.6903685408296, 1048.6557922616594, 726.7979987013975, 
999.2434112804393, 661.1273703378432, 857.5096767640681, 874.1491517801398], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [9:23:50<4:06:30, 25.69it/s]global step 620000, trans_decision ep_re 855.443751387423

{"global_step": 620000, "eval_re": [823.5462632893621, 808.0990961578196, 
824.1812691083159, 772.7162147584629, 978.1019507181879, 892.0108991776622, 
804.7489196531179, 832.8904946511791, 868.1070974293542, 950.0353089307683], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [9:32:50<4:01:57, 25.49it/s]global step 630000, trans_decision ep_re 662.6484786606077

{"global_step": 630000, "eval_re": [693.7614903715045, 683.4944286832679, 
630.2396532238578, 741.7528285825318, 672.5045744253619, 567.2652304383059, 
767.361899451662, 474.25983381653185, 481.8087785659179, 914.0360690471352], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639998/1000000 [9:42:00<3:51:58, 25.86it/s]global step 640000, trans_decision ep_re 825.5541077868347

{"global_step": 640000, "eval_re": [885.8092710677901, 897.1955955832551, 
761.3436483769595, 757.3756258294396, 1024.6730753121667, 726.429371684008, 
800.2997177255315, 749.1521352254119, 880.6775847658093, 772.5850522979749], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [9:51:00<3:46:22, 25.77it/s]global step 650000, trans_decision ep_re 830.8356429176793

{"global_step": 650000, "eval_re": [875.6428250860154, 1035.9160244288328, 
788.4471016626259, 615.8887844200947, 763.423988420913, 847.8931046116934, 
693.794983147813, 932.2303913971012, 905.2646964265882, 849.8545295751147], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [10:00:10<3:40:13, 25.73it/s]global step 660000, trans_decision ep_re 844.4176202693864

{"global_step": 660000, "eval_re": [708.1990522855805, 745.4031364075271, 
851.379322099971, 715.9049508597884, 795.4352425211343, 978.4936781633742, 
718.429181403872, 824.9422092681957, 1162.0700325486434, 943.919397135776], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669997/1000000 [10:09:10<3:33:58, 25.70it/s]global step 670000, trans_decision ep_re 926.3762222701389

{"global_step": 670000, "eval_re": [840.0498085230107, 905.7863601952336, 
1194.704461352706, 743.773936808215, 1388.3042512186212, 779.5888389718446, 
1067.9682686900896, 845.4418087189478, 919.5202114740445, 578.6242767486766], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [10:18:20<3:26:33, 25.82it/s]global step 680000, trans_decision ep_re 975.8759064499898

{"global_step": 680000, "eval_re": [1183.3587929651458, 1219.3234201693542, 
863.3613526309363, 1285.4473200565808, 958.2049681834344, 861.8290853904259, 
842.6266356863239, 773.0527949511527, 783.2663253830484, 988.2883690834957], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689998/1000000 [10:27:30<3:18:20, 26.05it/s]global step 690000, trans_decision ep_re 860.0965994345518

{"global_step": 690000, "eval_re": [767.1560358045726, 842.6308653927317, 
1042.3748836755735, 975.9542470334437, 870.6410909974823, 436.49444965629704, 
926.6732981896268, 819.8902278140376, 1082.775120180791, 836.3757756009593], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699997/1000000 [10:36:30<3:13:36, 25.82it/s]global step 700000, trans_decision ep_re 891.4460588835743

{"global_step": 700000, "eval_re": [858.0276497670155, 916.1896405077957, 
781.823881756353, 877.2825148877852, 896.4339150613348, 839.1845528505477, 
1137.4832344843924, 907.8763340368314, 843.6218022174556, 856.5370632662331], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709998/1000000 [10:45:40<3:05:02, 26.12it/s]global step 710000, trans_decision ep_re 719.803189220497

{"global_step": 710000, "eval_re": [751.7715680349834, 687.6953267711485, 
516.2972238809097, 753.5656019355451, 857.5307412424588, 612.0994464492062, 
811.9512072626791, 844.6133069152492, 670.5091095022625, 691.9983602105267], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [10:54:50<3:01:48, 25.67it/s]global step 720000, trans_decision ep_re 754.4516262795532

{"global_step": 720000, "eval_re": [781.4597645039286, 682.7262775715892, 
846.8477242875614, 760.1857918655171, 899.819442482329, 1250.1181069504405, 
649.7679967727631, -229.7634757601692, 867.9632499800796, 1035.3913841414926], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729998/1000000 [11:03:50<2:50:22, 26.41it/s]global step 730000, trans_decision ep_re 1049.2781098546118

{"global_step": 730000, "eval_re": [1060.2520982292958, 886.3535036586487, 
1147.7554753006737, 1132.3331537195677, 765.858449161672, 1194.270657757435, 
840.212843484671, 1026.3025521588506, 1346.8764616013082, 1092.5659034739938], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739997/1000000 [11:12:50<2:45:42, 26.15it/s]global step 740000, trans_decision ep_re 913.0228118412933

{"global_step": 740000, "eval_re": [861.1202866273001, 1003.7843034545095, 
811.9182597292445, 772.2692746746799, 964.1504625761896, 1100.268343996378, 
815.699269476323, 910.2392118612877, 1081.176931398621, 809.6017746184015], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749998/1000000 [11:21:50<2:38:12, 26.34it/s]global step 750000, trans_decision ep_re 952.7320602565376

{"global_step": 750000, "eval_re": [632.3089787586091, 751.1256694168144, 
1209.1867226266738, 1063.4606455855812, 945.3948275824773, 844.471183812791, 
1023.7570102583336, 780.9373776099163, 1124.9273192664646, 1151.7508676477162], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [11:30:50<2:33:40, 26.03it/s]global step 760000, trans_decision ep_re 774.293191286028

{"global_step": 760000, "eval_re": [511.13108700936345, 646.9449766764773, 
750.6887027011464, 917.6071164700924, 848.1952404299183, 743.7614682863145, 
796.5977773414417, 754.823070616809, 788.3994998201706, 984.7829735085475], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [11:39:50<2:27:16, 26.03it/s]global step 770000, trans_decision ep_re 947.9464394125138

{"global_step": 770000, "eval_re": [752.1039118830486, 845.5742967873624, 
1132.3371785318914, 779.5964959466478, 1261.2692506549927, 1183.2038103804784, 
751.336494768625, 813.0171076223603, 1045.0076485481886, 916.0181990015434], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [11:49:00<2:19:18, 26.32it/s]global step 780000, trans_decision ep_re 807.3270561368571

{"global_step": 780000, "eval_re": [770.4039823898164, 766.406593782945, 
840.2115716964126, 773.9128571544626, 1001.6534133331829, 734.5040993114462, 
901.0037640317926, 748.0031796767876, 715.70939117878, 821.4617088129464], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789997/1000000 [11:58:00<2:15:43, 25.79it/s]global step 790000, trans_decision ep_re 924.5469434260158

{"global_step": 790000, "eval_re": [1077.3064175686172, 1119.6689737618035, 
837.9161691956919, 969.3103249684829, 875.5831382408695, 872.1706149565306, 
845.6901846380665, 748.8166004102196, 1000.9344221035909, 898.072588416286], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [12:07:00<2:07:54, 26.06it/s]global step 800000, trans_decision ep_re 946.1395542340537

{"global_step": 800000, "eval_re": [855.4012174448852, 814.4778172341468, 
805.2274531821482, 925.0209785924101, 1231.0499776616984, 1104.5453227920827, 
916.0113325135628, 756.1491817856602, 926.6652136088788, 1126.8470475250629], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [12:16:00<2:02:05, 25.94it/s]global step 810000, trans_decision ep_re 876.8536067823882

{"global_step": 810000, "eval_re": [775.5276973644615, 852.214289483417, 
789.0742488475497, 1107.4347554467201, 685.8927412706023, 974.5791289608269, 
1114.867864127199, 825.723888566293, 943.3133105763195, 699.9081431804916], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819998/1000000 [12:25:00<1:53:11, 26.51it/s]global step 820000, trans_decision ep_re 868.1194398489688

{"global_step": 820000, "eval_re": [1023.8807757599991, 910.9002276455155, 
965.9793292342733, 872.2583095790263, 709.7622443474855, 689.1221308922112, 
802.3985764402006, 921.2847948392753, 830.0606831811765, 955.5473265705238], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829997/1000000 [12:34:00<1:50:03, 25.74it/s]global step 830000, trans_decision ep_re 850.2559005886493

{"global_step": 830000, "eval_re": [751.7376856039551, 843.7045373883043, 
700.3035503167173, 772.0389663633487, 881.4144086735666, 694.4028203476555, 
763.4546734105212, 1161.946113224974, 1050.804662122057, 882.7515884353918], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [12:43:00<1:42:31, 26.01it/s]global step 840000, trans_decision ep_re 960.5680608699237

{"global_step": 840000, "eval_re": [872.5339547888889, 859.1474788436046, 
1327.3060233287636, 864.9600471539823, 1143.720477082361, 1028.8783311448863, 
763.559628010868, 992.3615535228461, 575.5987881360239, 1177.6143266870129], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849997/1000000 [12:52:00<1:35:44, 26.11it/s]global step 850000, trans_decision ep_re 979.7151238474756

{"global_step": 850000, "eval_re": [1133.3094695289515, 734.3498246308161, 
950.793380731371, 829.7793936751787, 1210.1997757421675, 753.6381518969558, 
1199.8285766452054, 1196.6687167504535, 844.6575920022899, 943.9263568713664], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [13:01:00<1:29:28, 26.08it/s]global step 860000, trans_decision ep_re 999.7331836395686

{"global_step": 860000, "eval_re": [1057.9238242675426, 892.4646607341324, 
794.9772565264848, 1320.8892571362612, 835.9232685901338, 927.9154308559055, 
1126.3686947925314, 1113.4761625041272, 786.9851365638167, 1140.4081444247506], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [13:10:00<1:23:40, 25.89it/s]global step 870000, trans_decision ep_re 888.7115033958065

{"global_step": 870000, "eval_re": [787.3446182332268, 899.8152482640452, 
1040.2700966671584, 788.6548887989654, 897.5775518224812, 918.1648389320451, 
994.5342113330172, 762.2863121543255, 996.7014440036539, 801.7658237491463], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879998/1000000 [13:19:10<1:16:06, 26.28it/s]global step 880000, trans_decision ep_re 992.5286459894185

{"global_step": 880000, "eval_re": [1150.5146805937086, 790.4254541297795, 
1109.4979866345257, 1166.9622859951921, 1050.4972219698436, 706.5188136636898, 
863.2448106262331, 741.7193074297197, 1085.3143833907939, 1260.5915154606982], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889997/1000000 [13:28:10<1:10:49, 25.89it/s]global step 890000, trans_decision ep_re 940.8972804745451

{"global_step": 890000, "eval_re": [813.6786703161819, 779.8605186588298, 
908.7677429274113, 859.8757358908059, 987.519234201064, 862.1958389436684, 
948.189939609655, 1372.9956460565147, 988.6326608266819, 887.2568173146376], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899998/1000000 [13:37:10<1:04:18, 25.92it/s]global step 900000, trans_decision ep_re 818.3748262100459

{"global_step": 900000, "eval_re": [854.2065248829123, 879.6902776755524, 
826.2825559908888, 847.2932612146681, 756.3856606256851, 859.0361963390801, 
612.7307241383435, 883.0557125035627, 624.8298151288186, 1040.2375336009482], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [13:46:20<58:25, 25.67it/s]global step 910000, trans_decision ep_re 841.4615099728087

{"global_step": 910000, "eval_re": [748.2827953084462, 703.8967646218281, 
915.3177956881613, 665.797531035383, 803.8550651956447, 652.0872921436222, 
1478.4738826713267, 839.5903185824617, 798.2342366126275, 809.079417868585], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919998/1000000 [13:55:20<51:09, 26.06it/s]global step 920000, trans_decision ep_re 976.4151863804424

{"global_step": 920000, "eval_re": [904.2463090419224, 754.6761434003461, 
1070.412660727001, 892.494638241807, 1336.1028030812504, 889.7040285642598, 
1074.9983171583424, 940.3131987043362, 975.5931953553687, 925.6105695297902], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [14:04:30<45:19, 25.74it/s]global step 930000, trans_decision ep_re 774.3384675365593

{"global_step": 930000, "eval_re": [969.8647438789043, 611.6649601525138, 
883.1241986036724, 730.6558956546022, 644.7270328098599, 718.6190036036256, 
581.3354551399138, 654.5806819993538, 1159.1226410495908, 789.6900624735558], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939998/1000000 [14:13:30<38:25, 26.02it/s]global step 940000, trans_decision ep_re 704.1137489965057

{"global_step": 940000, "eval_re": [776.7100134786898, 817.6701375733129, 
828.6431999974635, 883.5282959125194, -252.59457911567102, 935.2610722514032, 
811.5293078927376, 737.3643672566242, 738.8227696393348, 764.2029050786432], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949998/1000000 [14:22:40<31:59, 26.05it/s]global step 950000, trans_decision ep_re 903.7873314495948

{"global_step": 950000, "eval_re": [780.0460010851726, 1131.0526330037371, 
873.602649411363, 1002.6647386877146, 799.6414344654004, 731.2030873354258, 
1228.926088752294, 753.424855513963, 893.3369538933665, 843.9748723475117], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959998/1000000 [14:31:40<25:33, 26.08it/s]global step 960000, trans_decision ep_re 861.4866970875801

{"global_step": 960000, "eval_re": [1383.0047350080708, 720.3407631603608, 
777.7118180652427, 881.9783287937632, 795.5794914384912, 740.5408179537881, 
682.4568750224041, 1112.750936236279, 825.9679571510336, 694.535248046368], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969998/1000000 [14:40:50<19:09, 26.10it/s]global step 970000, trans_decision ep_re 993.507950509647

{"global_step": 970000, "eval_re": [1014.0726884165078, 1039.1038023503259, 
800.6234864611131, 994.1994002780907, 892.1143809581556, 1256.4743562016615, 
935.9223748493607, 1242.212752589886, 912.5872352234057, 847.7690277679628], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979998/1000000 [14:49:50<12:49, 26.00it/s]global step 980000, trans_decision ep_re 932.881883742326

{"global_step": 980000, "eval_re": [755.059926013072, 1008.8768260734341, 
907.0857849993025, 967.9333643150177, 788.8239448718854, 1161.525374447125, 
1087.4463640379636, 950.1288372923034, 719.7484047792927, 982.1900105938645], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989998/1000000 [14:59:00<06:22, 26.14it/s]global step 990000, trans_decision ep_re 760.7776276701693

{"global_step": 990000, "eval_re": [127.38222657273653, 750.6995923493209, 
798.148128181692, 929.7093547108817, 1041.3931143789816, 672.0177242744934, 
919.4724305486837, 954.6507584204085, 849.6794066912827, 564.6235405732125], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999997/1000000 [15:08:00<00:00, 25.78it/s]global step 1000000, trans_decision ep_re 846.1331593357502

{"global_step": 1000000, "eval_re": [762.9582534300872, 882.5624663390181, 
935.5718917610677, 901.8776151271339, 872.7645308033901, 814.5462112506615, 
868.639297014287, 794.6062953295097, 837.1489522807329, 790.6560800216122], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [15:08:34<00:00, 18.34it/s]
