
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:21<10:51:26, 25.33it/s]global step 10000, trans_decision ep_re 8.707748151656254

{"global_step": 10000, "eval_re": [49.00362301972377, 6.494319462365251, 
3.235519624892012, -2.579240544565572, -0.18776083333360993, 1.4689064303832848,
1.7468080623547593, 8.129423728303372, 6.305264327129846, 13.46061823930943], 
"eval_len": [67, 23, 18, 17, 11, 19, 21, 21, 16, 29]}

  2%|▏         | 19999/1000000 [13:20<10:52:05, 25.05it/s]global step 20000, trans_decision ep_re 5.026148935240134

{"global_step": 20000, "eval_re": [-1.4019251646859616, 3.1508501393392763, 
4.026519948842353, 5.832757167608048, 1.041721995403615, 1.4728039147240866, 
28.760687007954804, -0.8451911408776415, 2.552243604139638, 5.671021879953122], 
"eval_len": [12, 16, 23, 31, 22, 14, 37, 26, 21, 30]}

  3%|▎         | 29997/1000000 [22:10<10:45:53, 25.03it/s]global step 30000, trans_decision ep_re 22.919963226939032

{"global_step": 30000, "eval_re": [0.6685051124005814, 13.487429216324895, 
9.54822481646756, 2.1487560763478903, 0.5395421062144753, 8.639942994601325, 
10.472637161688384, 1.839914136473471, 1.0239114571330978, 180.83076919173865], 
"eval_len": [11, 35, 22, 19, 19, 22, 26, 15, 12, 130]}

  4%|▍         | 39997/1000000 [31:00<10:34:34, 25.21it/s]global step 40000, trans_decision ep_re 6.039250027334284

{"global_step": 40000, "eval_re": [0.41459707525501577, 1.5732376167889761, 
2.200204546827607, 1.057794572121039, 8.566469634711627, 25.142319505497667, 
14.881702477492512, -2.7567828591405688, 1.452511028390039, 7.8604466753989355],
"eval_len": [13, 23, 18, 14, 26, 35, 33, 20, 12, 31]}

  5%|▍         | 49997/1000000 [39:50<10:39:26, 24.76it/s]global step 50000, trans_decision ep_re 6.591489046218169

{"global_step": 50000, "eval_re": [5.099279990466646, 6.3761249852625035, 
22.839122686698502, -1.897590905004315, 8.374368429319022, 10.202317114859456, 
0.8384346406781452, 3.5528018282749754, 0.9966135299448444, 9.533418161681908], 
"eval_len": [17, 23, 33, 10, 26, 27, 16, 23, 15, 27]}

  6%|▌         | 59998/1000000 [48:25<10:14:45, 25.48it/s]global step 60000, trans_decision ep_re 3.610576336282125

{"global_step": 60000, "eval_re": [6.9820961329532185, -1.0300022307430854, 
4.957830256471103, 2.5078279618500217, -1.0172072543987372, -1.976908831950129, 
8.777410184576118, -0.9815943746743956, 11.349014855250768, 6.537296663486373], 
"eval_len": [17, 16, 22, 28, 25, 11, 24, 25, 30, 29]}

  7%|▋         | 69998/1000000 [57:15<10:15:25, 25.19it/s]global step 70000, trans_decision ep_re 19.433684151607896

{"global_step": 70000, "eval_re": [15.601460112269995, 6.704134448960232, 
15.670526877560881, 1.600790500131826, 3.9894978159067875, 12.03126060150701, 
1.0405979551297573, 0.8678349593240263, 2.9456991140675415, 133.88503913122088],
"eval_len": [30, 17, 27, 16, 13, 26, 27, 10, 16, 171]}

  8%|▊         | 79998/1000000 [1:06:04<10:09:38, 25.15it/s]global step 80000, trans_decision ep_re 5.749701793719033

{"global_step": 80000, "eval_re": [20.354148794142034, 3.000216680893135, 
19.871684794538698, -2.451143990952278, -0.8844985235414387, 0.7215421569364036,
11.985792973031149, 3.4910142607782753, -0.29275980857741, 1.7010205999417662], 
"eval_len": [30, 21, 29, 21, 13, 12, 32, 15, 21, 12]}

  9%|▉         | 89998/1000000 [1:14:54<9:51:59, 25.62it/s]global step 90000, trans_decision ep_re 5.568831708908291

{"global_step": 90000, "eval_re": [4.051842575877359, 14.638250928537087, 
-2.6818942094928846, 4.637837514150293, 7.681141884881078, 7.302244744045531, 
3.4169899621379582, 8.642756819032044, 5.027469701050382, 2.971677168864055], 
"eval_len": [23, 31, 25, 17, 18, 29, 13, 22, 19, 26]}

 10%|▉         | 99999/1000000 [1:23:50<9:49:27, 25.45it/s]global step 100000, trans_decision ep_re 4.640764406409861

{"global_step": 100000, "eval_re": [2.2951112406965124, 12.294942503972578, 
5.114017184162682, 2.1954309780869132, 5.2149202815982125, 6.803385654865128, 
-0.6493183367384363, 2.798513285809847, 3.957415608738711, 6.383225662906459], 
"eval_len": [17, 28, 21, 25, 14, 17, 11, 16, 17, 21]}

 11%|█         | 109997/1000000 [1:32:25<9:57:44, 24.82it/s]global step 110000, trans_decision ep_re 5.202692331319023

{"global_step": 110000, "eval_re": [3.2713882699679573, 2.777782830926698, 
2.1360850774493154, 4.258601080046221, 6.176709760020295, 18.785259838692593, 
5.672508239667807, 1.6559527329310566, 4.754198100631012, 2.5384373828572686], 
"eval_len": [22, 27, 18, 16, 26, 30, 23, 18, 20, 21]}

 12%|█▏        | 119999/1000000 [1:41:10<9:39:38, 25.30it/s]global step 120000, trans_decision ep_re 7.139916294269622

{"global_step": 120000, "eval_re": [-0.3229701335411744, 3.218252170037485, 
11.527024236309103, 3.1728281405324905, 4.148972281948121, 13.856366200739117, 
4.903838219496987, 8.325354938500814, 21.766589987203698, 0.8029069014695622], 
"eval_len": [10, 28, 26, 18, 20, 29, 21, 31, 46, 14]}

 13%|█▎        | 129997/1000000 [1:49:56<9:36:25, 25.16it/s]global step 130000, trans_decision ep_re 4.578353579781292

{"global_step": 130000, "eval_re": [20.598387864455237, 3.7778706116258376, 
-0.2303230163534125, -1.7270287180439163, 6.304469645784809, 
0.34135157714324726, 4.982571339665972, 3.6853169431600343, 0.45759522822354204,
7.593324322151581], "eval_len": [68, 16, 12, 11, 20, 13, 18, 27, 19, 22]}

 14%|█▍        | 139999/1000000 [1:58:41<9:34:36, 24.94it/s]global step 140000, trans_decision ep_re 9.12259233198601

{"global_step": 140000, "eval_re": [10.888430496236605, 18.262065250140235, 
19.02566417147093, 6.403179175778519, 10.10418960272844, 11.003141082360138, 
5.364655863893369, 4.381273470145918, 0.13914738812844638, 5.6541768189774855], 
"eval_len": [29, 62, 30, 22, 38, 29, 16, 18, 10, 24]}

 15%|█▍        | 149999/1000000 [2:07:40<9:18:24, 25.37it/s]global step 150000, trans_decision ep_re 1.7559210764311686

{"global_step": 150000, "eval_re": [3.1611898004627017, -1.8785872978093119, 
7.98590964816301, -1.8948936971206556, -0.7912412003854055, 6.662152585519318, 
-2.467321287590835, 0.46658025544304504, 6.998415274026951, 
-0.6829933163971316], "eval_len": [14, 22, 19, 14, 12, 27, 20, 15, 18, 12]}

 16%|█▌        | 159998/1000000 [2:16:13<9:07:58, 25.55it/s]global step 160000, trans_decision ep_re 19.295911948630547

{"global_step": 160000, "eval_re": [0.7517116060137203, 8.451973237225165, 
2.556783868419856, 1.9287252436532758, 15.128546796444997, 15.888922990251732, 
4.967440526154326, -2.799486436355898, 140.05933811933548, 6.025163535162825], 
"eval_len": [22, 28, 17, 18, 44, 32, 27, 8, 158, 17]}

 17%|█▋        | 169999/1000000 [2:25:10<9:02:00, 25.52it/s]global step 170000, trans_decision ep_re 9.616007842204985

{"global_step": 170000, "eval_re": [12.58714144034415, 2.7042962927964904, 
1.3740748525822104, 2.574911027598196, 18.364548473647954, 8.546158400296472, 
7.55485059407982, 6.154163457700913, 3.0598745476483753, 33.24005933535527], 
"eval_len": [29, 13, 28, 21, 28, 22, 21, 29, 15, 89]}

 18%|█▊        | 179997/1000000 [2:34:00<9:07:47, 24.95it/s]global step 180000, trans_decision ep_re 55.61727297687905

{"global_step": 180000, "eval_re": [439.5348447763744, 1.5189078000560685, 
2.9019277179738947, 2.1092477822967264, 97.14592431716243, 3.358151085816836, 
1.3613388960801163, -1.8862099163551416, 4.198408615865407, 5.930188693519784], 
"eval_len": [459, 17, 14, 19, 89, 22, 22, 19, 14, 19]}

 19%|█▉        | 189998/1000000 [2:42:33<8:48:02, 25.57it/s]global step 190000, trans_decision ep_re 4.838652448535468

{"global_step": 190000, "eval_re": [2.945351086016707, 3.983884396197622, 
11.638196610727478, -1.074200703307271, 12.150981763190599, 7.929655364021308, 
7.24893773225836, 0.45481401435899704, 4.080676317335337, -0.9717720954444617], 
"eval_len": [19, 27, 22, 12, 26, 32, 18, 12, 16, 27]}

 20%|█▉        | 199999/1000000 [2:51:30<8:51:18, 25.10it/s]global step 200000, trans_decision ep_re 31.19691228701935

{"global_step": 200000, "eval_re": [5.635611286128862, -0.9769271708353994, 
2.796214261490294, 12.359639501349902, 8.169391333862507, 4.042520032025118, 
9.090614281802331, 263.1402694943947, 3.2426173888671013, 4.469172461108154], 
"eval_len": [18, 19, 28, 21, 26, 21, 34, 152, 15, 23]}

 21%|██        | 209997/1000000 [3:00:05<8:42:56, 25.18it/s]global step 210000, trans_decision ep_re 8.759909700578115

{"global_step": 210000, "eval_re": [2.062234174748281, 3.238983181905797, 
1.0504828693765917, 10.719370760081409, 47.260574248697765, 4.240441429744317, 
8.329250853437578, 18.454645665248123, -7.885434976439133, 0.12854879898041668],
"eval_len": [15, 15, 17, 25, 91, 22, 24, 28, 27, 14]}

 22%|██▏       | 219999/1000000 [3:08:50<8:37:50, 25.10it/s]global step 220000, trans_decision ep_re 26.62247038987557

{"global_step": 220000, "eval_re": [180.37369778023535, 2.1304674119729667, 
17.43611524237463, 2.7473801471117225, -5.004294095689796, 53.86450203102153, 
1.9704294212041726, -0.23163123500934452, 3.74574880338528, 9.192288392149173], 
"eval_len": [209, 29, 33, 19, 19, 75, 14, 9, 17, 23]}

 23%|██▎       | 229997/1000000 [3:17:50<8:30:27, 25.14it/s]global step 230000, trans_decision ep_re 38.21851702684998

{"global_step": 230000, "eval_re": [293.8597891859625, 3.022106907342909, 
55.114541934799014, -0.13596058990218593, -0.3433816251753329, 
14.104551698779657, 1.0094944913281683, 4.788772069815147, 13.86643301127239, 
-3.101176815722398], "eval_len": [194, 24, 73, 27, 10, 26, 13, 19, 26, 17]}

 24%|██▍       | 239998/1000000 [3:26:24<8:21:07, 25.28it/s]global step 240000, trans_decision ep_re 6.447815946793962

{"global_step": 240000, "eval_re": [5.910755311253738, 2.9469851208760467, 
3.172103583793466, 1.5318042859786367, 6.326807845552508, 20.923687875876208, 
8.979433197155519, 14.694454228345663, -2.8044497380228184, 2.7965777571306556],
"eval_len": [24, 17, 24, 15, 26, 32, 31, 30, 16, 25]}

 25%|██▍       | 249999/1000000 [3:35:10<8:20:36, 24.97it/s]global step 250000, trans_decision ep_re 8.857625283398683

{"global_step": 250000, "eval_re": [64.88688743696548, 1.872173123627448, 
-1.5829973107148527, -0.7234172041311491, -2.0506362439393797, 
6.440190977743524, 7.244984511117962, 5.247516717377011, 4.557884097504089, 
2.6836667284366844], "eval_len": [88, 12, 23, 18, 25, 18, 29, 21, 20, 24]}

 26%|██▌       | 259997/1000000 [3:44:10<8:05:40, 25.39it/s]global step 260000, trans_decision ep_re 24.733243058733603

{"global_step": 260000, "eval_re": [10.003556671620007, 4.142827252082118, 
1.5561586133627698, -3.007044677547542, 2.6934927349560613, -0.9587117489138443,
173.1504468437389, 4.013789164830948, 1.9474970786290235, 53.79041865457761], 
"eval_len": [20, 15, 18, 26, 24, 14, 153, 20, 28, 83]}

 27%|██▋       | 269998/1000000 [3:52:43<8:03:46, 25.15it/s]global step 270000, trans_decision ep_re 11.811290689224636

{"global_step": 270000, "eval_re": [6.514517267234164, 11.609373268578373, 
-3.1698148176881484, 10.13478434383646, 3.2082910537144285, 10.502360880116711, 
14.360086416137321, -0.3556799154878494, 66.27306475020268, -0.964076354397783],
"eval_len": [27, 26, 21, 29, 13, 29, 37, 18, 78, 25]}

 28%|██▊       | 279999/1000000 [4:01:30<7:59:49, 25.01it/s]global step 280000, trans_decision ep_re 84.61217759026204

{"global_step": 280000, "eval_re": [14.181762147881791, 18.935164659753585, 
3.5746004522233497, 762.854743252062, 9.029805655446355, 11.564971811668567, 
14.346897472900016, 3.4306925203581, 5.1396080246723095, 3.0635299056542777], 
"eval_len": [25, 30, 13, 309, 24, 30, 31, 14, 15, 26]}

 29%|██▉       | 289997/1000000 [4:10:30<7:54:06, 24.96it/s]global step 290000, trans_decision ep_re 5.084925999373063

{"global_step": 290000, "eval_re": [0.80472522354187, -0.27731649077147535, 
10.388155748555448, 3.27376831303324, 0.2908140535278665, 10.72113088644093, 
13.955533052162775, 7.975301339121614, 0.4129262992832695, 3.304221568835088], 
"eval_len": [11, 15, 36, 22, 26, 24, 34, 27, 33, 16]}

 30%|██▉       | 299998/1000000 [4:19:03<7:39:23, 25.40it/s]global step 300000, trans_decision ep_re 4.9382480437305105

{"global_step": 300000, "eval_re": [4.2875069277310605, 14.669437861487323, 
3.1296239470882536, 8.7743377025447, -3.0158749484166156, 4.32361235625919, 
0.5316111095480329, 5.731260826071926, 9.559375203097495, 1.3915894518937353], 
"eval_len": [26, 22, 24, 27, 20, 15, 15, 17, 19, 20]}

 31%|███       | 309999/1000000 [4:28:00<7:33:42, 25.35it/s]global step 310000, trans_decision ep_re 5.090585787721838

{"global_step": 310000, "eval_re": [4.042540578711684, 12.503734987917415, 
7.461612586393642, 6.987940697774188, 3.2592012294071635, 1.9038918586616744, 
5.06451841827434, 2.7746344631692246, 4.333108693471627, 2.574674363437421], 
"eval_len": [16, 45, 26, 21, 14, 17, 20, 16, 17, 19]}

 32%|███▏      | 319997/1000000 [4:36:50<7:29:11, 25.23it/s]global step 320000, trans_decision ep_re 61.52828818968569

{"global_step": 320000, "eval_re": [11.766562441197156, 1.082824024065845, 
7.138194945070847, 1.644592375336117, 66.9467482796369, 8.192949239679487, 
4.464920920378232, 4.211268689224509, 0.9428086397645663, 508.8920123425032], 
"eval_len": [22, 28, 30, 13, 75, 25, 27, 20, 12, 256]}

 33%|███▎      | 329998/1000000 [4:45:23<7:18:44, 25.45it/s]global step 330000, trans_decision ep_re 46.562742922615556

{"global_step": 330000, "eval_re": [9.534815616058182, 0.6867861899713151, 
0.44374265944372726, 4.365618191863657, 0.03892407284166177, 8.003992321375453, 
3.453433944424106, 275.229069979188, 160.6700832937423, 3.2009629572471585], 
"eval_len": [34, 13, 21, 21, 13, 22, 14, 135, 129, 22]}

 34%|███▍      | 339999/1000000 [4:54:10<7:16:24, 25.21it/s]global step 340000, trans_decision ep_re 4.91737412437704

{"global_step": 340000, "eval_re": [-0.1961886736847459, -1.9134394193396276, 
3.630556425423557, 12.044147990884671, 1.0661343501361644, 0.9758599196958655, 
29.45171963716416, -2.766154922404837, 4.873345223560502, 2.0077607123346954], 
"eval_len": [19, 25, 15, 31, 12, 12, 68, 14, 21, 16]}

 35%|███▍      | 349997/1000000 [5:03:10<7:13:55, 24.97it/s]global step 350000, trans_decision ep_re 199.08338955336777

{"global_step": 350000, "eval_re": [3.537837580842603, -0.6353045327764952, 
-1.5637203919583733, 1196.3518866537067, 189.0216108659169, -3.8846607130055912,
3.528602533934427, 581.1223743255679, 7.709441726244099, 15.645827485205617], 
"eval_len": [29, 13, 17, 534, 133, 25, 26, 282, 21, 30]}

 36%|███▌      | 359997/1000000 [5:12:00<7:02:43, 25.23it/s]global step 360000, trans_decision ep_re 17.10528390276715

{"global_step": 360000, "eval_re": [2.047608109277114, 0.7096862106393097, 
-1.5392202691860062, 6.589313935046665, -0.714159515994772, -7.800963583826759, 
2.6793379391304324, 161.18311192972087, 13.170241973276699, -5.272117700411996],
"eval_len": [25, 12, 23, 22, 24, 27, 13, 129, 26, 27]}

 37%|███▋      | 369998/1000000 [5:20:33<6:53:52, 25.37it/s]global step 370000, trans_decision ep_re 68.4467258054034

{"global_step": 370000, "eval_re": [3.0979411390288476, 568.3092932394242, 
2.220225589622483, 2.6296167236796664, -3.179119513072764, 3.823969641166559, 
-0.2705723493036448, 0.9321681973885063, 96.30762826750151, 10.596107118598646],
"eval_len": [12, 246, 28, 20, 10, 27, 21, 21, 75, 37]}

 38%|███▊      | 379999/1000000 [5:29:21<6:53:37, 24.98it/s]global step 380000, trans_decision ep_re 53.47500721017699

{"global_step": 380000, "eval_re": [2.3994318311810225, 3.598586533572782, 
321.3290119741139, 0.9041630252762249, 5.1370444893267635, 0.2140696485946334, 
7.218564200305602, -0.3258235099286105, 191.17924814284433, 3.0957757664832806],
"eval_len": [22, 32, 177, 15, 20, 11, 17, 13, 99, 16]}

 39%|███▉      | 389999/1000000 [5:38:20<6:41:40, 25.31it/s]global step 390000, trans_decision ep_re 3.2429575535762525

{"global_step": 390000, "eval_re": [3.933303804337645, 2.104194539859001, 
-0.9376347617840739, 4.091821102494715, 0.4591462236439366, 0.17738631663385673,
3.1777732693516887, 3.362751267006602, 7.530223346926765, 8.530610427292395], 
"eval_len": [16, 27, 13, 14, 15, 24, 28, 16, 17, 25]}

 40%|███▉      | 399997/1000000 [5:46:55<6:42:52, 24.82it/s]global step 400000, trans_decision ep_re 57.74931310715912

{"global_step": 400000, "eval_re": [-4.56901107973262, 1.9235180006141341, 
3.4067245200224847, 0.9095727954222989, 539.9482789038681, 3.253173923565997, 
31.122352379663077, -2.335697037581572, 3.5119234026585158, 0.3222952630907651],
"eval_len": [24, 22, 17, 10, 240, 23, 54, 15, 16, 19]}

 41%|████      | 409999/1000000 [5:55:42<6:33:22, 25.00it/s]global step 410000, trans_decision ep_re 7.822173620615162

{"global_step": 410000, "eval_re": [1.635992731299074, 18.15912944676136, 
11.167822060018295, 3.297104615277916, 13.42649284713415, 16.084651351700707, 
5.821945297075517, 3.9500129495680296, 2.136141290838517, 2.542443616478041], 
"eval_len": [14, 47, 22, 17, 25, 46, 17, 21, 16, 15]}

 42%|████▏     | 419999/1000000 [6:04:40<6:27:42, 24.93it/s]global step 420000, trans_decision ep_re 3.370672122151128

{"global_step": 420000, "eval_re": [4.958884388463475, 0.12675267846513788, 
-1.080013051091181, -3.3702523595111114, -0.9378392674458416, 
11.992732864740288, 13.470002181266429, 3.6795125303642884, -2.251571038751122, 
7.118512295010924], "eval_len": [25, 17, 14, 24, 17, 26, 36, 16, 11, 29]}

 43%|████▎     | 429999/1000000 [6:13:30<6:16:51, 25.21it/s]global step 430000, trans_decision ep_re 26.186668179818064

{"global_step": 430000, "eval_re": [3.888588759033496, -3.011710182543977, 
202.63682948182213, -4.221488904100831, 8.077702884926508, 4.002515963098329, 
5.583913459622065, 3.2990254664014436, 5.12582263564133, 36.48548223428012], 
"eval_len": [15, 10, 138, 25, 24, 16, 18, 15, 35, 60]}

 44%|████▍     | 439997/1000000 [6:22:20<6:14:11, 24.94it/s]global step 440000, trans_decision ep_re 8.587309207081857

{"global_step": 440000, "eval_re": [0.39316665235146775, 13.3119353454921, 
2.1433894686700414, -1.40799485701679, 5.970326011744235, -0.09059754683305257, 
24.721144354336698, 31.041959247637127, 5.953411445284196, 3.8363519491525526], 
"eval_len": [14, 27, 15, 19, 34, 12, 44, 67, 18, 22]}

 45%|████▍     | 449999/1000000 [6:31:10<6:09:06, 24.83it/s]global step 450000, trans_decision ep_re 57.95289513281027

{"global_step": 450000, "eval_re": [1.9417128251287363, 348.2134810160137, 
-0.5262500511826457, 3.111304311606368, -4.388698392299225, -8.424642084263693, 
0.18210295050379846, 0.8337596167974971, 2.681084996812084, 235.9050961389861], 
"eval_len": [23, 175, 12, 15, 12, 22, 22, 18, 18, 130]}

 46%|████▌     | 459999/1000000 [6:40:00<5:58:55, 25.08it/s]global step 460000, trans_decision ep_re 11.539603020983577

{"global_step": 460000, "eval_re": [10.753056875829715, 1.3552826578790296, 
7.86347990226948, -2.6812970727677614, 12.570050971707339, 8.131615310937548, 
0.6068726820785738, -1.2807752658336877, 68.84628261156067, 9.231461536174859], 
"eval_len": [21, 18, 25, 23, 29, 22, 14, 31, 59, 18]}

 47%|████▋     | 469999/1000000 [6:48:40<5:54:29, 24.92it/s]global step 470000, trans_decision ep_re 1.5416264369364203

{"global_step": 470000, "eval_re": [-0.35005714049566095, 5.756466022290788, 
6.24397399547999, 2.9498102283214003, 1.953095704079404, -2.8679801533728835, 
4.709493511648683, 0.23621991579782414, -4.067543749985619, 0.8527860356002772],
"eval_len": [28, 18, 21, 28, 21, 23, 26, 22, 18, 21]}

 48%|████▊     | 479997/1000000 [6:57:40<5:46:32, 25.01it/s]global step 480000, trans_decision ep_re 9.487754382720654

{"global_step": 480000, "eval_re": [5.702061278082, 4.896674636149183, 
-1.6559563105373847, 5.454906650269934, 4.588534538816274, 4.072886738326013, 
1.6100956697474336, 9.776152310476524, 59.0869185276289, 1.3452697882476479], 
"eval_len": [19, 32, 16, 18, 24, 27, 26, 28, 64, 14]}

 49%|████▉     | 489998/1000000 [7:06:14<5:37:15, 25.20it/s]global step 490000, trans_decision ep_re 4.584920194542188

{"global_step": 490000, "eval_re": [12.095424872498304, 3.6774780819162127, 
7.41558005255624, 4.933768207119096, 4.385916584491075, 2.3060236492514923, 
-4.450229627290684, 7.002017091041563, 2.733147818008701, 5.750075215829878], 
"eval_len": [24, 20, 39, 17, 15, 19, 27, 26, 16, 21]}

 50%|████▉     | 499999/1000000 [7:15:00<5:31:19, 25.15it/s]global step 500000, trans_decision ep_re 2.570011383439364

{"global_step": 500000, "eval_re": [3.426070879720442, 1.1209981548909578, 
4.791461468066193, 8.52439612710222, 7.09161895234425, -2.181909053991684, 
6.495680537614763, -3.046459273510533, 3.15229763244006, -3.6740415902830272], 
"eval_len": [17, 15, 17, 25, 27, 15, 18, 19, 27, 23]}

 51%|█████     | 509997/1000000 [7:24:00<5:20:51, 25.45it/s]global step 510000, trans_decision ep_re 33.251618881667405

{"global_step": 510000, "eval_re": [-0.4528234335504762, -0.8572247196500007, 
7.89474732639115, 67.7443854502148, -0.8949583964979494, 9.81042355489339, 
-2.3611663736201214, 1.983186638286353, 0.24274942049559955, 
249.40686934971131], "eval_len": [17, 14, 31, 71, 18, 24, 26, 25, 15, 201]}

 52%|█████▏    | 519998/1000000 [7:32:34<5:16:49, 25.25it/s]global step 520000, trans_decision ep_re 33.90444115298436

{"global_step": 520000, "eval_re": [0.7543086152395666, 5.78433752576692, 
8.467334381533213, 5.379168003900629, 9.543204700322148, 246.52255753298104, 
-0.8182245922107874, -0.6845651897610213, 49.96409116649275, 14.13219938557916],
"eval_len": [14, 21, 21, 18, 27, 195, 12, 16, 76, 34]}

 53%|█████▎    | 529999/1000000 [7:41:21<5:13:50, 24.96it/s]global step 530000, trans_decision ep_re 7.335314454418736

{"global_step": 530000, "eval_re": [6.897568336070934, 1.2772613953232042, 
3.969152048046178, 2.1414960001802643, 2.487482303786881, -3.0951850880524425, 
3.1806067714532054, 9.645306139286, -0.17199522772985446, 47.02145186582299], 
"eval_len": [19, 28, 22, 13, 15, 19, 26, 21, 11, 136]}

 54%|█████▍    | 539999/1000000 [7:50:20<5:06:34, 25.01it/s]global step 540000, trans_decision ep_re 117.19214778575099

{"global_step": 540000, "eval_re": [9.419809212470952, 5.968400663497491, 
-0.36070140902138703, 5.361858054076406, 1.7726211818585713, 
-2.5531108672794876, 1.400505571604286, 6.869749352492469, 25.784586226626708, 
1118.257759871184], "eval_len": [20, 19, 19, 14, 13, 12, 17, 32, 81, 334]}

 55%|█████▍    | 549997/1000000 [7:59:10<5:03:00, 24.75it/s]global step 550000, trans_decision ep_re 10.719015660493069

{"global_step": 550000, "eval_re": [5.820021344021576, 2.7939841157108543, 
4.6740249982885045, 13.431526944919996, -8.449892521404374, 16.56716224632956, 
8.667553376961857, 24.66426521035135, 2.210888824551117, 36.81062206520025], 
"eval_len": [16, 19, 16, 28, 19, 45, 24, 72, 11, 50]}

 56%|█████▌    | 559998/1000000 [8:07:44<4:45:56, 25.65it/s]global step 560000, trans_decision ep_re 11.762000392609588

{"global_step": 560000, "eval_re": [11.424910415538015, 6.549925724740456, 
4.27074565631569, 5.353817863828474, 5.909633693189154, 12.022175602186195, 
-3.0640671358567877, 6.145443169290686, 3.170438319263619, 65.83698061760039], 
"eval_len": [22, 17, 18, 15, 15, 28, 17, 20, 35, 86]}

 57%|█████▋    | 569999/1000000 [8:16:32<4:47:28, 24.93it/s]global step 570000, trans_decision ep_re 139.2673277978963

{"global_step": 570000, "eval_re": [5.889664867434557, 25.242155171709744, 
7.614911618030254, 4.516851083792326, 1264.2341002449004, 4.194244679058602, 
1.833991180337375, 2.774635521155136, 0.5229359546494805, 75.8497876578949], 
"eval_len": [15, 48, 21, 19, 472, 20, 15, 16, 15, 137]}

 58%|█████▊    | 579999/1000000 [8:25:21<4:42:29, 24.78it/s]global step 580000, trans_decision ep_re 43.07714716524848

{"global_step": 580000, "eval_re": [403.68899658612395, 17.089717466630166, 
-2.6181141293446024, 1.2212453361302416, 1.305493741672148, 0.15205068248423392,
0.8265850228520744, 0.852845683936917, 8.005078139499572, 0.24757312250009794], 
"eval_len": [256, 27, 21, 31, 14, 13, 17, 23, 26, 14]}

 59%|█████▉    | 589999/1000000 [8:34:20<4:32:21, 25.09it/s]global step 590000, trans_decision ep_re 14.732843846296126

{"global_step": 590000, "eval_re": [5.359671389952183, 7.969933847032237, 
5.66665448152018, 47.21723999536447, 2.6825799822703313, 9.516153682662479, 
37.82930153015816, 19.24285474770813, 5.787470634502142, 6.056578171790964], 
"eval_len": [18, 22, 15, 59, 15, 25, 57, 32, 25, 15]}

 60%|█████▉    | 599997/1000000 [8:43:10<4:24:22, 25.22it/s]global step 600000, trans_decision ep_re 63.61878497512905

{"global_step": 600000, "eval_re": [1.6693140363294308, 69.84545113136683, 
4.595516293100009, 1.8867276446772423, 220.767629177127, -3.2140219753335364, 
6.677779457014113, 1.94971055497121, 197.94607139478836, 134.06367203724992], 
"eval_len": [14, 133, 14, 28, 215, 20, 33, 28, 138, 109]}

 61%|██████    | 609997/1000000 [8:52:00<4:23:03, 24.71it/s]global step 610000, trans_decision ep_re 56.58750575103712

{"global_step": 610000, "eval_re": [-1.799365039933766, 3.5429831560807346, 
35.317569860403296, 8.04563256239223, 495.30011411614356, -0.5649876722221028, 
10.5334653796095, 4.619560474475309, 11.117236377959726, -0.23715170453725487], 
"eval_len": [16, 18, 52, 20, 205, 13, 25, 15, 46, 12]}

 62%|██████▏   | 619998/1000000 [9:00:35<4:10:07, 25.32it/s]global step 620000, trans_decision ep_re 0.6833436360640561

{"global_step": 620000, "eval_re": [-0.42629607432870575, -2.154767217861677, 
-1.9246490864469838, -1.0310164058124252, -1.6672618360153761, 
-2.3877666533040443, 1.0741301666749206, 3.662653678252652, 9.072043212603104, 
2.616366576879097], "eval_len": [17, 15, 17, 11, 10, 18, 17, 28, 36, 17]}

 63%|██████▎   | 629999/1000000 [9:09:22<4:05:39, 25.10it/s]global step 630000, trans_decision ep_re 53.295163310849674

{"global_step": 630000, "eval_re": [244.49911146744495, 210.7865600263265, 
17.038231305617153, 2.5557652433639286, 6.985043274149836, 36.31633745564321, 
2.9954128318627578, 5.499389258000102, 0.9438854889378927, 5.331896757150393], 
"eval_len": [160, 129, 26, 13, 21, 57, 26, 21, 14, 16]}

 64%|██████▍   | 639999/1000000 [9:18:10<3:57:21, 25.28it/s]global step 640000, trans_decision ep_re 36.431514473644306

{"global_step": 640000, "eval_re": [5.569425313230196, -0.5804626154394865, 
-0.3590348127555602, 185.7051654872059, 1.1576230540755126, 1.7522416005540071, 
-0.11728810025944636, 173.91631178691895, 2.1025640895846065, 
-4.83140106667161], "eval_len": [22, 38, 16, 87, 17, 17, 13, 93, 13, 27]}

 65%|██████▍   | 649997/1000000 [9:27:10<3:56:19, 24.68it/s]global step 650000, trans_decision ep_re 9.863643629782677

{"global_step": 650000, "eval_re": [12.28174584574937, -4.037605483437843, 
2.185400792799638, 4.801971155197923, 4.00830839033782, 2.0386972881590997, 
44.779603728517785, 2.870581766643599, 6.138473543643116, 23.56925927021628], 
"eval_len": [44, 19, 22, 25, 26, 28, 53, 34, 16, 38]}

 66%|██████▌   | 659997/1000000 [9:36:00<3:45:59, 25.08it/s]global step 660000, trans_decision ep_re 4.528120033888576

{"global_step": 660000, "eval_re": [-0.4748811353433815, -0.020361667061678593, 
7.992988021266122, -0.3362780508982204, 4.1082232861620085, 2.8229375967628925, 
25.336218514694494, 10.622014222966587, -3.6187534482984733, 
-1.1509070013645855], "eval_len": [11, 16, 36, 17, 27, 19, 47, 27, 13, 11]}

 67%|██████▋   | 669998/1000000 [9:44:34<3:34:39, 25.62it/s]global step 670000, trans_decision ep_re 23.55945168915715

{"global_step": 670000, "eval_re": [5.4366946986643185, 4.227175609744693, 
-3.778244754808196, 5.774199729401749, 1.7930155459328154, 7.028741068402439, 
8.292904150512149, 199.16820947923745, 4.6376605581716, 3.014160806312474], 
"eval_len": [21, 24, 13, 16, 24, 17, 21, 103, 21, 20]}

 68%|██████▊   | 679999/1000000 [9:53:23<3:33:30, 24.98it/s]global step 680000, trans_decision ep_re 39.267841587683414

{"global_step": 680000, "eval_re": [2.7034946806284936, 3.7967163725444886, 
-2.097413836556029, -2.06095554095481, 41.9423299639587, -4.5645323729273715, 
124.60551744655243, 53.64693924542478, -2.866563481613504, 177.57288339977694], 
"eval_len": [19, 19, 14, 18, 58, 20, 89, 68, 17, 189]}

 69%|██████▉   | 689999/1000000 [10:02:12<3:30:01, 24.60it/s]global step 690000, trans_decision ep_re 19.396302590733747

{"global_step": 690000, "eval_re": [84.8121232808437, -1.6397615317345025, 
7.691146854982376, 8.843347385135514, 23.78004203907402, 5.618552349633194, 
7.933655894640464, 46.254602917830276, 8.291929471782018, 2.377387245150421], 
"eval_len": [80, 11, 22, 19, 49, 19, 17, 88, 23, 20]}

 70%|██████▉   | 699999/1000000 [10:11:10<3:18:38, 25.17it/s]global step 700000, trans_decision ep_re 4.895567398087253

{"global_step": 700000, "eval_re": [8.425226197532393, 0.177408931058781, 
14.969143783859368, -1.051451629380812, 1.679456236577506, -1.662814265281942, 
4.397487998355015, 15.572519735567782, 0.8260104795315848, 5.622686513052853], 
"eval_len": [57, 11, 29, 10, 17, 26, 18, 28, 15, 65]}

 71%|███████   | 709997/1000000 [10:20:00<3:12:02, 25.17it/s]global step 710000, trans_decision ep_re 17.963173602939392

{"global_step": 710000, "eval_re": [25.482677022559027, 128.91187856189683, 
0.6052520212481898, 2.19031806316808, 3.61386596848554, -1.0570842763957695, 
-0.7925725772521967, 5.97279262257385, 6.225969480700651, 8.478639142409726], 
"eval_len": [43, 118, 15, 13, 20, 16, 12, 20, 17, 28]}

 72%|███████▏  | 719997/1000000 [10:28:50<3:07:18, 24.91it/s]global step 720000, trans_decision ep_re 12.3924991025426

{"global_step": 720000, "eval_re": [3.626383694170714, 1.923541505008302, 
-0.49064176414755334, 107.19971661503956, 1.9743725478581837, 
0.2426966893047793, 0.9593562914587211, 0.8529632652475123, 3.087489233122794, 
4.54911294836298], "eval_len": [12, 18, 11, 183, 12, 28, 23, 13, 17, 21]}

 73%|███████▎  | 729998/1000000 [10:37:25<2:56:59, 25.43it/s]global step 730000, trans_decision ep_re 51.385024314056444

{"global_step": 730000, "eval_re": [20.374688282582603, 14.881483373964295, 
1.4871616786188213, 6.187447804661799, 4.555608347295534, -2.0764237138181514, 
452.1928434357349, 5.217194560532474, 1.322877125170226, 9.707362245821939], 
"eval_len": [39, 26, 13, 21, 18, 14, 231, 17, 14, 23]}

 74%|███████▍  | 739998/1000000 [10:46:14<2:50:35, 25.40it/s]global step 740000, trans_decision ep_re 64.71464573217163

{"global_step": 740000, "eval_re": [3.0441164165291843, 8.605482733134952, 
9.540757129276503, -1.7629903656197161, 41.13617218529467, -0.8026714790119917, 
0.5130200832875211, 5.104640481040487, 4.541073121723068, 577.2268570160616], 
"eval_len": [14, 22, 22, 10, 90, 15, 12, 17, 16, 229]}

 75%|███████▍  | 749998/1000000 [10:55:04<2:45:42, 25.14it/s]global step 750000, trans_decision ep_re 75.1572412349056

{"global_step": 750000, "eval_re": [9.919026801427473, 3.0465076165065694, 
-0.8119544630570836, 6.700727431129632, 5.72118315373344, 277.76539153436795, 
442.9392597312463, -2.6013472995637743, 4.215884626830324, 4.677733216435156], 
"eval_len": [21, 27, 17, 28, 26, 167, 224, 13, 26, 17]}

 76%|███████▌  | 759998/1000000 [11:03:54<2:38:29, 25.24it/s]global step 760000, trans_decision ep_re 9.920829053024132

{"global_step": 760000, "eval_re": [7.361785834026678, 22.501781744610902, 
-0.5387890060163008, 11.861474794573127, 0.22332183490410137, 
-1.3021825549607764, 51.39143057881153, 0.40313098264835356, 1.7359866836287399,
5.570349638014974], "eval_len": [26, 51, 17, 21, 11, 18, 89, 20, 16, 19]}

 77%|███████▋  | 769998/1000000 [11:12:43<2:33:55, 24.90it/s]global step 770000, trans_decision ep_re 9.820542191225908

{"global_step": 770000, "eval_re": [0.9916286216021413, 1.127978123984376, 
1.854353066826122, -6.304054395864317, 42.04258698859925, -0.5133909159233937, 
1.2033928695388543, 3.9339204514253407, 10.884550100452696, 42.984457001618], 
"eval_len": [13, 22, 17, 22, 77, 20, 27, 16, 28, 67]}

 78%|███████▊  | 779999/1000000 [11:21:32<2:24:39, 25.35it/s]global step 780000, trans_decision ep_re 56.96212797463055

{"global_step": 780000, "eval_re": [14.832717074521577, 4.4592315033841565, 
6.048198463545867, 2.245522116858851, 4.6643069109612325, 6.813468240001107, 
-0.923332268611538, -4.113852587117686, 528.595414940336, 6.999605352425822], 
"eval_len": [52, 15, 23, 16, 25, 23, 10, 26, 295, 16]}

 79%|███████▉  | 789999/1000000 [11:30:21<2:19:32, 25.08it/s]global step 790000, trans_decision ep_re 168.09161574192575

{"global_step": 790000, "eval_re": [4.206025794210869, 1316.3685052198182, 
0.9318816461026581, 6.35596893343038, 4.182912405170859, 115.29453858006165, 
8.656525094601445, 2.8032575592589364, 211.10864316998976, 11.007899016612477], 
"eval_len": [24, 432, 12, 16, 27, 137, 23, 13, 111, 22]}

 80%|███████▉  | 799999/1000000 [11:39:11<2:11:20, 25.38it/s]global step 800000, trans_decision ep_re 6.850793601217857

{"global_step": 800000, "eval_re": [2.709267408780158, 0.9924032660565816, 
2.5394607416141604, 6.222812153671683, 28.985382070649937, 5.29509787487867, 
1.8297983412482348, 8.626088524660348, 8.005223552758295, 3.302402077860522], 
"eval_len": [16, 15, 21, 16, 53, 41, 15, 23, 23, 19]}

 81%|████████  | 809998/1000000 [11:47:55<2:03:44, 25.59it/s]global step 810000, trans_decision ep_re 3.3545656717531918

{"global_step": 810000, "eval_re": [1.2881484614849743, 5.468897599020246, 
-2.3144040421382597, 4.6472565893644, 2.234173210175215, 12.216059630666406, 
2.977860803133683, -0.5041805619377541, 2.3098719471588445, 5.221973080604166], 
"eval_len": [21, 22, 25, 40, 21, 25, 17, 13, 19, 32]}

 82%|████████▏ | 819999/1000000 [11:56:50<1:58:47, 25.25it/s]global step 820000, trans_decision ep_re 5.731051711741192

{"global_step": 820000, "eval_re": [5.168039091368814, 9.221314053505967, 
4.217288145262586, 2.1209665593586213, 16.400610075931972, 11.450849379372368, 
1.5419586933561167, 0.5692971352042182, 5.992538561341606, 0.6276554227096515], 
"eval_len": [29, 20, 25, 15, 39, 29, 15, 12, 17, 26]}

 83%|████████▎ | 829998/1000000 [12:05:23<1:49:47, 25.81it/s]global step 830000, trans_decision ep_re 43.76460092083492

{"global_step": 830000, "eval_re": [11.792181819440296, -1.02414604741241, 
252.95360631413337, 2.381873460112268, 30.39411002753971, 5.900539505609511, 
12.725951406832976, 1.0107796600829744, 6.7942401426294285, 114.71687291938116],
"eval_len": [25, 11, 188, 25, 44, 27, 21, 31, 19, 97]}

 84%|████████▍ | 839999/1000000 [12:14:20<1:46:14, 25.10it/s]global step 840000, trans_decision ep_re 54.91736133225049

{"global_step": 840000, "eval_re": [307.12181332290686, 2.6735077930428495, 
219.41531983354028, 12.588708870514559, 3.707123582836773, -2.6723933514121017, 
-0.028828469688936063, 0.013263564506623332, 1.1892007587353999, 
5.165897417522619], "eval_len": [173, 22, 155, 39, 13, 25, 24, 27, 13, 15]}

 85%|████████▍ | 849998/1000000 [12:22:54<1:38:40, 25.34it/s]global step 850000, trans_decision ep_re 9.639820971196391

{"global_step": 850000, "eval_re": [4.107412985180821, 4.858366644817361, 
17.841063373094396, 9.616621072993858, 20.142311411644386, -0.8769058378140756, 
3.5887856363367963, 0.2637581088583476, 3.1448709120413754, 33.71192540481064], 
"eval_len": [23, 20, 39, 27, 39, 15, 17, 15, 21, 44]}

 86%|████████▌ | 859999/1000000 [12:31:50<1:33:05, 25.06it/s]global step 860000, trans_decision ep_re 68.93942980953217

{"global_step": 860000, "eval_re": [3.1577649058602737, 3.1104347963922487, 
32.989235143196936, 3.343417813832527, 7.43791209479884, -6.49697912537638, 
601.8094384964659, 25.13096909850003, 9.44007212348877, 9.472032748162677], 
"eval_len": [18, 24, 60, 34, 22, 22, 264, 49, 25, 26]}

 87%|████████▋ | 869997/1000000 [12:40:26<1:25:35, 25.32it/s]global step 870000, trans_decision ep_re 3.0526176178196813

{"global_step": 870000, "eval_re": [-0.9115021977538572, -1.6059486234351221, 
3.6845774072730375, 3.5420137057635377, 0.03302223494895801, 3.9979927104052284,
15.426275192812989, 5.324074270224493, 0.48643486131085584, 0.5492366166466942],
"eval_len": [14, 11, 27, 24, 12, 21, 29, 19, 23, 17]}

 88%|████████▊ | 879999/1000000 [12:49:12<1:19:52, 25.04it/s]global step 880000, trans_decision ep_re 49.7448836873159

{"global_step": 880000, "eval_re": [14.605149214265372, 1.7254112010767069, 
-2.3388604320840782, 1.4791767033947676, 4.882800932166441, 10.32493219824935, 
456.749787943928, -0.4417342771560339, 2.957387258219191, 7.504786131099245], 
"eval_len": [23, 18, 18, 11, 18, 19, 182, 13, 23, 18]}

 89%|████████▉ | 889999/1000000 [12:58:10<1:12:20, 25.34it/s]global step 890000, trans_decision ep_re 1.1363149356838327

{"global_step": 890000, "eval_re": [1.5423191267012948, 3.0390355412538423, 
2.376174834359643, -1.5070262644541295, 4.124657096096667, -2.0361549733642805, 
0.3694563869147726, 2.294991565017615, -0.11804594171440537, 
1.2777419860273056], "eval_len": [19, 25, 15, 14, 26, 20, 15, 23, 11, 17]}

 90%|████████▉ | 899998/1000000 [13:06:44<1:06:06, 25.21it/s]global step 900000, trans_decision ep_re 10.136203364087352

{"global_step": 900000, "eval_re": [43.09475828370064, 7.915106389042541, 
7.877242683604538, 5.092473209567316, -2.555993863587562, 11.097241336926043, 
2.420538323648603, 5.919205624281817, 13.234669872742103, 7.266791780947471], 
"eval_len": [73, 24, 19, 17, 10, 38, 15, 30, 38, 40]}

 91%|█████████ | 909999/1000000 [13:15:40<59:54, 25.04it/s]global step 910000, trans_decision ep_re 9.052437923655528

{"global_step": 910000, "eval_re": [58.02135848024426, -0.9266842094461331, 
-2.3637088207485446, 4.569445231342933, 0.19650141895990436, 11.571932470907859,
15.102282508383395, 3.990758221809905, 1.597444513473265, -1.2349505783715773], 
"eval_len": [83, 22, 17, 15, 26, 25, 41, 23, 18, 12]}

 92%|█████████▏| 919998/1000000 [13:24:14<52:46, 25.27it/s]global step 920000, trans_decision ep_re 15.02077824123091

{"global_step": 920000, "eval_re": [-2.5751165771188975, 3.900134799558031, 
3.0561223917653013, 6.19886989895286, 2.232331843103762, 5.0579913473587945, 
-0.4062472677383676, 115.20173710273477, 4.947324893260323, 12.594633980432542],
"eval_len": [20, 15, 24, 35, 14, 21, 17, 81, 25, 21]}

 93%|█████████▎| 929998/1000000 [13:33:04<45:58, 25.38it/s]global step 930000, trans_decision ep_re 1.9445356032283114

{"global_step": 930000, "eval_re": [7.767004067307436, -0.1340726598397632, 
1.2062271809460698, 2.485480130409223, 0.2159344978063826, 4.576357847442498, 
-2.8894547745270414, 9.14933745094792, -5.414007354978792, 2.4825496467691845], 
"eval_len": [27, 23, 15, 11, 34, 25, 21, 23, 14, 19]}

 94%|█████████▍| 939997/1000000 [13:42:10<41:20, 24.19it/s]global step 940000, trans_decision ep_re 7.44209411115852

{"global_step": 940000, "eval_re": [0.35220908330347644, 3.2197157397800136, 
1.0051232433056194, 18.326276466270826, 32.53234755519456, 3.796137994108081, 
1.4144124531808622, 2.89130549524217, 3.214021599754784, 7.669391481444808], 
"eval_len": [13, 13, 28, 63, 75, 18, 35, 23, 17, 19]}

 95%|█████████▍| 949999/1000000 [13:50:52<33:19, 25.01it/s]global step 950000, trans_decision ep_re 111.44040340097952

{"global_step": 950000, "eval_re": [319.3542142608649, 542.8201063285027, 
-0.8360332992011226, 5.163982938397214, 219.60364992338077, 11.50567224885861, 
5.416526904748648, 10.17530556814879, -3.2947043425608564, 4.495313478655595], 
"eval_len": [149, 212, 14, 14, 269, 27, 18, 22, 26, 20]}

 96%|█████████▌| 959997/1000000 [14:00:00<26:37, 25.05it/s]global step 960000, trans_decision ep_re 81.99348151478657

{"global_step": 960000, "eval_re": [2.614617837944304, -0.6786246817875086, 
186.78936182397197, -1.7589199149733645, 7.324164932218715, 572.7825443146551, 
-1.5367674821809505, 47.52352961132485, 1.632686703738518, 5.242222002954131], 
"eval_len": [15, 17, 182, 24, 19, 285, 14, 54, 15, 20]}

 97%|█████████▋| 969997/1000000 [14:08:50<19:53, 25.13it/s]global step 970000, trans_decision ep_re 6.51706419459529

{"global_step": 970000, "eval_re": [-2.760990878614244, 27.450217396366213, 
1.9819760535590216, 6.558134215716233, 2.646110598646003, 6.380552651854197, 
0.31713745210014904, 4.293804800753899, 0.1447007742317799, 18.15899888133964], 
"eval_len": [26, 40, 13, 17, 16, 22, 18, 21, 39, 40]}

 98%|█████████▊| 979999/1000000 [14:17:40<13:19, 25.03it/s]global step 980000, trans_decision ep_re 7.264256953456008

{"global_step": 980000, "eval_re": [4.257672948115158, -3.909268187224011, 
-2.8647701766023346, 49.260378272327785, 6.902062466695988, 2.7657347924105493, 
3.89383828582402, 6.977342486021171, 5.809805832263196, -0.4502271852714417], 
"eval_len": [20, 16, 21, 58, 17, 13, 17, 24, 29, 19]}

 99%|█████████▉| 989998/1000000 [14:26:25<06:34, 25.33it/s]global step 990000, trans_decision ep_re 26.527811690578016

{"global_step": 990000, "eval_re": [1.1987462124626136, 158.08943589290126, 
3.3364273110503624, 90.05012694609393, 3.8969587880521543, -1.9859023856568658, 
3.4038158075246296, -5.345862237926697, 4.10821588016313, 8.52615469111569], 
"eval_len": [12, 155, 14, 78, 21, 21, 16, 23, 24, 24]}

100%|█████████▉| 999997/1000000 [14:35:30<00:00, 24.97it/s]global step 1000000, trans_decision ep_re 135.1303548424697

{"global_step": 1000000, "eval_re": [1.368474830196433, 5.159333281198402, 
-0.549989427233315, -0.3768206299644082, 6.208830307096297, 3.3998775873945766, 
9.424368588372083, 2.3508725494140283, 1321.4188251964306, 2.899776141792405], 
"eval_len": [25, 18, 13, 22, 18, 22, 21, 21, 569, 39]}

100%|██████████| 1000000/1000000 [14:35:33<00:00, 19.04it/s]
