
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9998/1000000 [04:40<10:41:15, 25.73it/s]global step 10000, trans_decision ep_re -6.222268836472166

{"global_step": 10000, "eval_re": [-24.34695663726578, -11.909191040972088, 
-14.539520479967265, -8.002098604271458, -11.715969343430576, 
-6.061403722469477, -5.120860350081501, 0.058092770441391606, 19.21416483175265,
0.2010542115424402], "eval_len": [214, 123, 65, 108, 58, 65, 101, 26, 74, 45]}

  2%|▏         | 19999/1000000 [13:20<10:36:49, 25.65it/s]global step 20000, trans_decision ep_re -112.24801301001851

{"global_step": 20000, "eval_re": [5.537101518300593, -152.7170945426179, 
-421.8567130144558, -0.39492012376206476, -43.619323459007354, 
-505.0978852568894, -77.78023072709921, -15.738925005909806, 
-4.4387169657652645, 93.62657747702103], "eval_len": [110, 259, 1000, 12, 140, 
924, 202, 46, 72, 145]}

  3%|▎         | 29999/1000000 [22:10<10:30:01, 25.66it/s]global step 30000, trans_decision ep_re -34.70313007405126

{"global_step": 30000, "eval_re": [-12.287142884816003, -233.82379058364668, 
-19.42009003612567, -2.9242696668398764, -8.26230752934942, -9.716209289774303, 
-24.356673624949376, -34.72092231044733, -16.213794745083106, 
14.693899930519184], "eval_len": [46, 1000, 85, 74, 66, 35, 93, 91, 63, 34]}

  4%|▍         | 39999/1000000 [31:00<10:35:37, 25.17it/s]global step 40000, trans_decision ep_re -50.94207223331762

{"global_step": 40000, "eval_re": [-9.080688132564752, -138.25407038781125, 
-65.3947507749086, -51.62791206572987, 14.746649627934467, 12.181085313544207, 
-74.4871667009736, -141.60915707079343, 9.320148447515537, -65.21486058938889], 
"eval_len": [32, 1000, 148, 351, 67, 32, 167, 1000, 27, 1000]}

  5%|▍         | 49999/1000000 [40:00<10:21:32, 25.47it/s]global step 50000, trans_decision ep_re -34.99893375294767

{"global_step": 50000, "eval_re": [-7.733905706242062, -13.315351126831775, 
-13.048235981463858, 0.5199360513624338, -9.0426739699899, -163.37644935551387, 
2.2733369767523754, -11.26939091466307, -161.67198993325883, 26.67538643037182],
"eval_len": [95, 139, 84, 46, 40, 1000, 19, 161, 299, 188]}

  6%|▌         | 59998/1000000 [48:40<10:10:37, 25.66it/s]global step 60000, trans_decision ep_re -10.650956511532746

{"global_step": 60000, "eval_re": [-7.405458259128013, -38.02339302914158, 
39.48242609007077, -16.912755313421073, -4.230230265269861, -85.8629774840129, 
-10.431781252917602, -8.279995988447109, 0.21186086850647, 24.942739518433463], 
"eval_len": [13, 270, 46, 76, 80, 1000, 24, 82, 15, 49]}

  7%|▋         | 69997/1000000 [57:30<10:11:29, 25.35it/s]global step 70000, trans_decision ep_re -35.54708023293572

{"global_step": 70000, "eval_re": [-28.567121953093448, -12.17358451629587, 
-52.62567095030109, -13.119286469942397, 49.9923070964211, 0.17082505795543856, 
-92.93948406553048, 16.127061771189005, -116.39376351280212, 
-105.94208478695732], "eval_len": [131, 33, 348, 68, 199, 60, 258, 30, 315, 
698]}

  8%|▊         | 79998/1000000 [1:06:20<9:56:57, 25.69it/s]global step 80000, trans_decision ep_re -13.593034213695901

{"global_step": 80000, "eval_re": [-51.708103084532574, -20.59322398129551, 
1.6793612751939455, -5.624861289776561, 8.155305788073107, -77.96092593432014, 
-13.538778056997206, 13.078974031987865, 14.26656056232239, 
-3.6846514476143315], "eval_len": [1000, 105, 116, 214, 31, 120, 84, 39, 30, 
74]}

  9%|▉         | 89999/1000000 [1:15:00<9:54:25, 25.51it/s]global step 90000, trans_decision ep_re -21.574282087197318

{"global_step": 90000, "eval_re": [2.0965839627175322, -19.242098545272697, 
0.42040443114319337, -80.51554079782784, -3.332714845972988, 12.239536631140275,
2.0955308676455884, 1.4366245197773755, -38.12696230424552, -92.8141847910781], 
"eval_len": [37, 123, 65, 1000, 12, 59, 19, 25, 134, 1000]}

 10%|▉         | 99999/1000000 [1:23:50<9:52:18, 25.32it/s]global step 100000, trans_decision ep_re -14.70817955541134

{"global_step": 100000, "eval_re": [-54.91940075357743, 11.405223115311108, 
-10.378643439217628, 8.692969887825122, -45.78078928752967, 9.432196145208287, 
9.017198249973061, -106.42329219630845, 15.940254380963882, 15.932488343238319],
"eval_len": [315, 127, 36, 23, 131, 79, 13, 1000, 52, 170]}

 11%|█         | 109997/1000000 [1:32:40<9:38:44, 25.63it/s]global step 110000, trans_decision ep_re -9.114970927182593

{"global_step": 110000, "eval_re": [-1.120359716708384, 12.168476058430494, 
-22.499415265215017, 54.826978557946134, -153.58376153939923, 
-29.265265639263784, 9.194906827263676, 34.58138182456708, -15.16510161277225, 
19.712451233325364], "eval_len": [148, 70, 47, 303, 1000, 1000, 32, 251, 31, 
76]}

 12%|█▏        | 119997/1000000 [1:41:30<9:33:00, 25.60it/s]global step 120000, trans_decision ep_re -26.15159068298765

{"global_step": 120000, "eval_re": [21.268069963439174, -62.91268217749554, 
6.82400224158545, -63.88154247762426, -16.553131011507645, -133.24795925741736, 
-16.223752925382136, 2.1825881302556835, 3.24874825122694, -2.220247566956754], 
"eval_len": [69, 1000, 15, 1000, 54, 1000, 51, 19, 38, 215]}

 13%|█▎        | 129999/1000000 [1:50:20<9:34:52, 25.22it/s]global step 130000, trans_decision ep_re -6.773323467939967

{"global_step": 130000, "eval_re": [-26.96181261363525, -41.47925403096169, 
-37.68250040417293, -17.70037769707459, -31.85218023532063, 69.97903553513093, 
58.20809646186034, 7.350178515155347, -47.068221096983045, -0.5261991133981698],
"eval_len": [416, 158, 147, 36, 1000, 265, 415, 143, 375, 56]}

 14%|█▍        | 139999/1000000 [1:59:10<9:24:18, 25.40it/s]global step 140000, trans_decision ep_re -8.396482280112998

{"global_step": 140000, "eval_re": [25.875239352572653, -13.374344903969757, 
28.994472038560826, 15.62241752685757, -83.2344364102564, -42.94970012931222, 
40.204065575056916, 60.857204209459475, -127.95339862350315, 
11.993658563404098], "eval_len": [215, 352, 75, 49, 1000, 134, 388, 363, 1000, 
41]}

 15%|█▍        | 149999/1000000 [2:08:10<9:18:01, 25.39it/s]global step 150000, trans_decision ep_re -10.343300685934416

{"global_step": 150000, "eval_re": [-17.85911229862334, 4.856843037411347, 
-104.60637300098868, 41.487345156643016, 21.59017214055935, 46.699743156315165, 
-14.978106300503027, 6.611272696437136, 13.437317920701634, 
-100.67210936729676], "eval_len": [658, 27, 1000, 205, 165, 211, 104, 41, 66, 
1000]}

 16%|█▌        | 159998/1000000 [2:17:00<9:02:03, 25.83it/s]global step 160000, trans_decision ep_re -24.536174613025096

{"global_step": 160000, "eval_re": [-78.83653746539255, 30.983765927009195, 
-19.996820124921246, -142.03735602147634, -54.338636750549796, 
-79.90696061220136, 75.39976362108965, 19.961872389300712, -10.066717041544091, 
13.47587994843489], "eval_len": [1000, 145, 186, 1000, 1000, 1000, 122, 173, 
147, 59]}

 17%|█▋        | 169998/1000000 [2:25:50<8:59:09, 25.66it/s]global step 170000, trans_decision ep_re -26.071247635434567

{"global_step": 170000, "eval_re": [1.6248944273477097, -118.74991819675745, 
-4.6270379985261245, -65.4382903026651, -72.1215380617679, -3.995076113522338, 
-13.1563308630083, 6.23927026672469, 20.00478208861187, -10.493231600782742], 
"eval_len": [92, 1000, 47, 1000, 1000, 18, 188, 34, 360, 47]}

 18%|█▊        | 179999/1000000 [2:34:50<8:52:35, 25.66it/s]global step 180000, trans_decision ep_re -36.6325954033544

{"global_step": 180000, "eval_re": [-139.98210799317857, 10.753823526887068, 
3.7294227813706793, -102.4673059743763, 35.464620101111095, -8.333742252611513, 
-30.152760622171506, 12.265973750517313, 27.708717379108403, 
-175.31259473020063], "eval_len": [1000, 35, 289, 1000, 86, 32, 459, 59, 211, 
1000]}

 19%|█▉        | 189997/1000000 [2:43:40<8:49:07, 25.51it/s]global step 190000, trans_decision ep_re 7.950133140790447

{"global_step": 190000, "eval_re": [3.9342920874916816, 18.005862207008164, 
4.655888627182846, 7.975211243303835, -38.198884837828615, 48.49648315104864, 
-4.937414880481619, 21.77475122440665, 19.919432935842103, -2.1242903500692094],
"eval_len": [12, 24, 119, 15, 80, 103, 136, 78, 80, 506]}

 20%|█▉        | 199998/1000000 [2:52:20<8:38:31, 25.71it/s]global step 200000, trans_decision ep_re -24.043476298640293

{"global_step": 200000, "eval_re": [-124.27569047456572, 1.0193299388931205, 
5.916491597001098, 23.958080053381394, -13.729170035948451, -28.81133343961135, 
-2.48090292376924, 5.783355882060221, -117.65535591719275, 9.840432333348758], 
"eval_len": [1000, 30, 305, 62, 96, 45, 47, 75, 1000, 46]}

 21%|██        | 209999/1000000 [3:01:10<8:34:32, 25.59it/s]global step 210000, trans_decision ep_re 18.455922713923222

{"global_step": 210000, "eval_re": [36.45405844874934, -5.249570611988748, 
75.34325108806298, 27.50356550617178, -10.64432715877803, 34.997406116105935, 
-2.4266846241685522, 2.6896672645279667, 13.179361045464333, 
12.712500065085186], "eval_len": [195, 198, 583, 52, 172, 47, 25, 50, 49, 44]}

 22%|██▏       | 219999/1000000 [3:10:00<8:26:50, 25.65it/s]global step 220000, trans_decision ep_re -17.335821476549587

{"global_step": 220000, "eval_re": [-43.845670299775236, 12.637329484701143, 
-42.295026459459606, -18.77044434629151, 30.81474240884004, -76.34455407115809, 
18.86501264869604, 1.5228929087878549, -29.107907002121692, 
-26.834590037714808], "eval_len": [1000, 31, 130, 33, 80, 1000, 28, 38, 157, 
242]}

 23%|██▎       | 229999/1000000 [3:18:40<8:20:54, 25.62it/s]global step 230000, trans_decision ep_re 26.890181083990463

{"global_step": 230000, "eval_re": [-1.071028832107451, 11.36014944930536, 
-4.66651301512604, 11.874599910297567, -5.771184739934736, 26.768715617738692, 
182.20601923090578, -0.18834543043860041, 20.612815556212784, 
27.776583093051233], "eval_len": [279, 42, 57, 38, 44, 34, 430, 53, 94, 85]}

 24%|██▍       | 239998/1000000 [3:27:30<8:07:33, 25.98it/s]global step 240000, trans_decision ep_re -11.978050603012948

{"global_step": 240000, "eval_re": [-69.15835250976006, 112.17926761935038, 
-2.8841348875273978, -41.62439128043144, -14.718807823370717, 
-75.71350597869836, 13.74004594047939, 17.224706636470554, -51.17093251969656, 
-7.654401226945271], "eval_len": [1000, 277, 83, 1000, 145, 523, 63, 36, 1000, 
85]}

 25%|██▍       | 249999/1000000 [3:36:20<8:11:43, 25.42it/s]global step 250000, trans_decision ep_re -8.917317308559287

{"global_step": 250000, "eval_re": [12.17718287484464, 0.04308262088003478, 
-46.55447793663447, 23.15905468861353, 6.74734922817743, 25.98758197814139, 
6.430488491062293, -90.89099516539639, -23.5733959031756, -2.6990439621057165], 
"eval_len": [27, 32, 1000, 79, 71, 47, 185, 1000, 89, 1000]}

 26%|██▌       | 259999/1000000 [3:45:10<8:04:21, 25.46it/s]global step 260000, trans_decision ep_re 3.8560311168035026

{"global_step": 260000, "eval_re": [22.378940143664014, -13.80959492601308, 
8.598473194104887, 35.656970374329376, 13.936771674083264, 8.685079669426273, 
-27.80908789626769, 7.7814420692829405, 53.95295131903961, -70.81163445361456], 
"eval_len": [871, 88, 75, 306, 98, 16, 1000, 9, 482, 1000]}

 27%|██▋       | 269998/1000000 [3:54:10<7:48:28, 25.97it/s]global step 270000, trans_decision ep_re 3.7366621849257626

{"global_step": 270000, "eval_re": [-90.31656491428411, 2.258102656850147, 
-15.80363147592335, 0.43617921029817264, 41.02553412240672, -82.88880275196428, 
-13.60655196624198, 158.2485687185119, 15.116088617628769, 22.897699631975645], 
"eval_len": [1000, 37, 126, 33, 1000, 325, 31, 423, 44, 84]}

 28%|██▊       | 279998/1000000 [4:03:00<7:46:55, 25.70it/s]global step 280000, trans_decision ep_re -3.6829947752505476

{"global_step": 280000, "eval_re": [6.559151815442441, 54.00347188022139, 
51.656561472331425, 11.030276910817964, -53.33826690875985, 48.536515091352854, 
-149.65842564381893, -45.207524989516486, 24.836289933131624, 
14.752002686292089], "eval_len": [36, 289, 1000, 23, 1000, 113, 1000, 1000, 154,
43]}

 29%|██▉       | 289999/1000000 [4:11:50<7:44:52, 25.45it/s]global step 290000, trans_decision ep_re 12.27630587341115

{"global_step": 290000, "eval_re": [1.2588152190039423, -20.16801071578036, 
11.555887179858894, -9.706501461140956, -62.5794305425914, 77.171383239843, 
31.24839995688091, -27.731564431056057, -0.7528628261962083, 
122.46694311528974], "eval_len": [51, 1000, 41, 37, 1000, 392, 86, 85, 30, 661]}

 30%|██▉       | 299999/1000000 [4:20:50<7:38:09, 25.46it/s]global step 300000, trans_decision ep_re 0.23024192606482147

{"global_step": 300000, "eval_re": [21.164388883446456, 41.77222774673589, 
2.1815721132824937, -2.441697451040258, -31.51232722124813, -37.71176468097174, 
4.78224110241028, 16.697106046297172, -49.11873281400369, 36.48940553573974], 
"eval_len": [1000, 305, 807, 195, 1000, 168, 61, 52, 334, 1000]}

 31%|███       | 309999/1000000 [4:29:40<7:29:02, 25.61it/s]global step 310000, trans_decision ep_re 8.576936742692709

{"global_step": 310000, "eval_re": [-10.39042285953189, 15.279297814311832, 
-72.86835935248483, 28.834990335939274, 25.85407720197092, 17.650761246362414, 
-36.04210056574948, 0.1292696660723479, -2.359431405142229, 119.68128534517874],
"eval_len": [206, 32, 1000, 172, 124, 88, 1000, 135, 19, 525]}

 32%|███▏      | 319998/1000000 [4:38:30<7:16:22, 25.97it/s]global step 320000, trans_decision ep_re -3.8841768589136962

{"global_step": 320000, "eval_re": [15.557599542088605, 43.60635000186872, 
-0.5438703663745255, -78.94585349528745, 10.392674285431573, 5.3699463209937335,
-1.9899680839479026, 47.407810414661085, -68.67727988288928, 
-11.019177325681527], "eval_len": [40, 105, 29, 1000, 17, 47, 6, 304, 1000, 
1000]}

 33%|███▎      | 329999/1000000 [4:47:20<7:18:17, 25.48it/s]global step 330000, trans_decision ep_re -8.86111140253723

{"global_step": 330000, "eval_re": [11.544469653692724, -100.43028967105025, 
-7.030684964516918, -43.780950489130745, 84.97583025628396, -35.41880234847261, 
-11.128612458450789, -11.972522031924818, 5.222833744256593, 19.40761428394057],
"eval_len": [132, 1000, 1000, 1000, 211, 1000, 67, 45, 64, 247]}

 34%|███▍      | 339997/1000000 [4:56:20<7:11:27, 25.49it/s]global step 340000, trans_decision ep_re 27.75571497929304

{"global_step": 340000, "eval_re": [9.861031517091051, -22.066588576454077, 
13.210576171757051, 13.091596707790925, 3.741082703097257, 40.22448877053596, 
4.79246887739904, 171.2383043201506, 38.41134068150431, 5.052848620058219], 
"eval_len": [31, 259, 26, 113, 16, 46, 16, 473, 214, 14]}

 35%|███▍      | 349999/1000000 [5:05:00<7:05:01, 25.49it/s]global step 350000, trans_decision ep_re 12.708014488723876

{"global_step": 350000, "eval_re": [-47.45566530790739, 54.407435097074995, 
1.3519788233249779, -4.587669851457199, -5.95211057120987, 26.51050418128545, 
-15.410961872611075, 106.61076026539372, -10.918968211739443, 
22.524842335084585], "eval_len": [1000, 1000, 241, 1000, 162, 59, 197, 1000, 74,
212]}

 36%|███▌      | 359997/1000000 [5:14:00<6:55:53, 25.65it/s]global step 360000, trans_decision ep_re 34.6553110660673

{"global_step": 360000, "eval_re": [-38.19892767951246, 68.77608848738322, 
86.88367543271443, 6.968400305369794, 23.22215809616581, 20.96942167811992, 
12.097811620759108, 10.116816784132434, 85.60733045650227, 70.1103354790385], 
"eval_len": [45, 874, 1000, 35, 153, 56, 134, 19, 1000, 288]}

 37%|███▋      | 369997/1000000 [5:22:50<6:46:17, 25.84it/s]global step 370000, trans_decision ep_re 12.895340808487958

{"global_step": 370000, "eval_re": [2.5832232485006976, -7.212527580977826, 
49.71011627637868, 8.518944287228306, -85.36136089776352, -43.71922968273317, 
-14.298715926584558, 167.50034791289028, 7.939547436565308, 43.293063011375374],
"eval_len": [42, 23, 108, 17, 1000, 198, 59, 466, 296, 77]}

 38%|███▊      | 379999/1000000 [5:31:30<6:42:32, 25.67it/s]global step 380000, trans_decision ep_re 14.688378555843812

{"global_step": 380000, "eval_re": [-27.470748775259615, -8.307280442277209, 
11.184888685650813, -10.150248516211628, 45.05425604408286, 3.047879646473123, 
-28.305518879467556, 148.83117707505951, -90.8128848093238, 103.81226552971162],
"eval_len": [181, 55, 90, 1000, 74, 175, 399, 428, 1000, 345]}

 39%|███▉      | 389998/1000000 [5:40:20<6:31:58, 25.94it/s]global step 390000, trans_decision ep_re 3.9669680681921795

{"global_step": 390000, "eval_re": [-7.123979821573272, 37.21918192196041, 
70.53652459443566, 9.556680199814844, 18.23627714888206, 29.35848852249085, 
6.97697556231447, -22.095275993904405, -53.398382553165945, -49.59680889933288],
"eval_len": [94, 77, 240, 31, 26, 135, 38, 31, 335, 109]}

 40%|███▉      | 399998/1000000 [5:49:00<6:21:53, 26.19it/s]global step 400000, trans_decision ep_re 13.789289341944112

{"global_step": 400000, "eval_re": [3.3709750934884872, -23.195519643108778, 
10.181074937971465, 0.4312184320944017, -33.94914796594829, 36.9697415014245, 
32.59847676600701, 23.254498224576725, -8.744373297995567, 96.97594937093119], 
"eval_len": [21, 159, 52, 48, 1000, 62, 82, 206, 436, 547]}

 41%|████      | 409998/1000000 [5:57:33<6:18:28, 25.98it/s]global step 410000, trans_decision ep_re -10.52830676451383

{"global_step": 410000, "eval_re": [-12.491997455791223, -9.347985944731246, 
-28.738290804779176, -8.271203072930788, 1.777960296939846, 5.1612940129934, 
9.841217571451878, -30.172230748640207, -24.167463610725182, -8.8743678889256], 
"eval_len": [248, 30, 89, 69, 67, 46, 103, 200, 147, 166]}

 42%|████▏     | 419997/1000000 [6:06:30<6:11:25, 26.03it/s]global step 420000, trans_decision ep_re 11.013491737056919

{"global_step": 420000, "eval_re": [-8.935363698279593, 62.17950893577548, 
18.369915919278714, 13.92492006054255, 9.268235833083672, 7.319254600222557, 
14.799584736831312, -38.862470748925176, 27.889128666338042, 4.182203065701639],
"eval_len": [27, 174, 485, 30, 73, 82, 27, 1000, 126, 172]}

 43%|████▎     | 429998/1000000 [6:15:10<6:04:29, 26.06it/s]global step 430000, trans_decision ep_re -20.819156448932567

{"global_step": 430000, "eval_re": [-31.73835559961608, -49.27448902931462, 
47.910398773043305, -17.97571283664159, -58.94322777836131, -72.15398693995226, 
10.767643685345814, -67.84818996822251, 8.136296214588771, 22.928058989804818], 
"eval_len": [51, 126, 146, 433, 1000, 1000, 21, 1000, 31, 276]}

 44%|████▍     | 439999/1000000 [6:24:00<6:01:55, 25.79it/s]global step 440000, trans_decision ep_re 8.60557237654486

{"global_step": 440000, "eval_re": [51.963759121952286, -4.8295161206134605, 
30.470858632613144, -2.7376073102538774, -20.096247482030886, 45.1347930533057, 
29.745270274789192, -49.29193876270894, 6.093824105995541, -0.3974717476000835],
"eval_len": [1000, 29, 1000, 43, 1000, 131, 502, 1000, 47, 32]}

 45%|████▍     | 449998/1000000 [6:32:50<5:50:50, 26.13it/s]global step 450000, trans_decision ep_re 3.0688680530875776

{"global_step": 450000, "eval_re": [21.774842953029438, 10.060732158923745, 
44.68406171392471, -22.01641381639423, 12.888123014077046, 15.677693595393816, 
7.296990724139574, 41.90746019071847, -94.44749279719257, -7.137317205744207], 
"eval_len": [167, 68, 32, 42, 51, 55, 56, 93, 448, 36]}

 46%|████▌     | 459998/1000000 [6:41:30<5:41:02, 26.39it/s]global step 460000, trans_decision ep_re -14.606394473898874

{"global_step": 460000, "eval_re": [-137.92498560386846, 31.676886856192763, 
23.579694191475884, 11.807088353357265, -9.094906421560879, -5.397595398076997, 
-21.73169910387231, 10.36231035643352, -50.383623021911056, 1.0428850528415408],
"eval_len": [350, 68, 1000, 33, 152, 1000, 36, 59, 97, 19]}

 47%|████▋     | 469998/1000000 [6:50:20<5:38:19, 26.11it/s]global step 470000, trans_decision ep_re 10.690973840436758

{"global_step": 470000, "eval_re": [18.887991464671455, 5.6290943617391465, 
12.317115010541434, -24.589687906967935, -40.309620361085315, 9.78006030266114, 
91.26443988008424, -52.67655919049021, 27.696819689192218, 58.910085154021395], 
"eval_len": [93, 22, 36, 106, 558, 1000, 254, 1000, 96, 284]}

 48%|████▊     | 479999/1000000 [6:59:00<5:36:22, 25.76it/s]global step 480000, trans_decision ep_re 3.4317454357956594

{"global_step": 480000, "eval_re": [11.107915397613937, 24.09872875787177, 
25.57888580991711, 9.0362336636029, -33.75327983635597, -14.792761039334868, 
22.980864071146033, 12.74505030798771, 5.112956720965986, -27.79713949545801], 
"eval_len": [110, 126, 123, 48, 64, 264, 126, 264, 71, 320]}

 49%|████▉     | 489999/1000000 [7:07:50<5:31:48, 25.62it/s]global step 490000, trans_decision ep_re -4.683901345502449

{"global_step": 490000, "eval_re": [-48.96213003877096, -4.354938515105475, 
18.431478674576997, 5.00949408682377, 13.95181648972565, 12.645749138262524, 
-13.889123287331154, -4.962909321922341, 9.183606841451665, -33.89205752273517],
"eval_len": [1000, 29, 20, 92, 263, 199, 86, 44, 88, 1000]}

 50%|████▉     | 499999/1000000 [7:16:30<5:24:21, 25.69it/s]global step 500000, trans_decision ep_re 7.184966151113146

{"global_step": 500000, "eval_re": [-63.11908037772233, 23.985894460704504, 
-11.85355169614072, -2.3920954995497907, -49.983058540265304, 
-10.409701738194403, -14.626081726393924, 77.70515919484993, 37.549285872037544,
84.99289156180595], "eval_len": [139, 208, 323, 39, 1000, 57, 122, 298, 191, 
396]}

 51%|█████     | 509999/1000000 [7:25:10<5:15:03, 25.92it/s]global step 510000, trans_decision ep_re -0.9332835023954968

{"global_step": 510000, "eval_re": [-14.849616248493527, 43.37493641671488, 
11.457853133027317, 10.219016532362534, -91.06593320481895, -6.979947400564247, 
14.417223663518232, 14.197759552012826, 16.383043632780364, -6.487171100494406],
"eval_len": [47, 221, 32, 27, 361, 151, 47, 86, 207, 64]}

 52%|█████▏    | 519999/1000000 [7:33:50<5:10:01, 25.80it/s]global step 520000, trans_decision ep_re 2.839798659913123

{"global_step": 520000, "eval_re": [-1.5884431513169823, -13.23518656380222, 
-31.564338446016674, 10.79821948534956, 5.526597066053072, -11.880493868732428, 
2.3482653949525036, -12.196067509614387, 10.731300686800832, 69.45813350545795],
"eval_len": [40, 41, 1000, 173, 50, 108, 1000, 85, 163, 313]}

 53%|█████▎    | 529999/1000000 [7:42:40<5:01:34, 25.97it/s]global step 530000, trans_decision ep_re 1.5509995514310808

{"global_step": 530000, "eval_re": [20.249802082157974, -3.939612314866046, 
-5.48734245491112, 16.899900903382683, 17.741482873435842, 1.3640066151704973, 
9.555423146934787, -32.01392510256252, 8.159655673039358, -17.019395907470646], 
"eval_len": [48, 44, 559, 34, 1000, 24, 1000, 1000, 32, 1000]}

 54%|█████▍    | 539999/1000000 [7:51:30<4:57:36, 25.76it/s]global step 540000, trans_decision ep_re -0.3185047754520081

{"global_step": 540000, "eval_re": [20.028577285412112, 21.430914296759738, 
-7.486261627147476, -8.65101986720468, 8.519399639954894, -26.520195059550954, 
-0.7752156088192328, 12.261264155674256, -41.14454013939751, 
19.152029169798773], "eval_len": [198, 31, 289, 74, 54, 48, 130, 12, 75, 50]}

 55%|█████▍    | 549997/1000000 [8:00:10<4:49:58, 25.86it/s]global step 550000, trans_decision ep_re -13.39308681382116

{"global_step": 550000, "eval_re": [-43.9365708768484, 0.02703342173708656, 
-58.77099011553723, 29.933040152925027, -57.38200990425629, 15.08316707103102, 
-13.732730395325593, -38.201035716409976, 20.885578032327327, 
12.163650192145479], "eval_len": [96, 143, 191, 121, 1000, 152, 45, 1000, 191, 
31]}

 56%|█████▌    | 559999/1000000 [8:08:50<4:45:17, 25.70it/s]global step 560000, trans_decision ep_re -5.521316733508891

{"global_step": 560000, "eval_re": [16.365082500694612, 8.677328141030284, 
-51.08220046527741, -17.459125466655646, 7.543404958208423, -30.952036133647653,
-2.0695704207513144, 65.77770459809008, 31.57519114390454, -83.58894619068482], 
"eval_len": [1000, 34, 1000, 194, 30, 1000, 1000, 269, 163, 1000]}

 57%|█████▋    | 569997/1000000 [8:17:50<4:39:16, 25.66it/s]global step 570000, trans_decision ep_re 4.454570963109822

{"global_step": 570000, "eval_re": [6.221795891620548, 8.889940544041842, 
-2.1608730966703686, -0.796042301052329, 12.457838801261286, 
-27.489993067320462, -11.513652689639645, 16.869449643191686, 
-4.682949291889608, 46.75019519755527], "eval_len": [32, 68, 39, 1000, 20, 106, 
46, 27, 29, 47]}

 58%|█████▊    | 579999/1000000 [8:26:30<4:32:18, 25.71it/s]global step 580000, trans_decision ep_re 2.046277591682216

{"global_step": 580000, "eval_re": [9.44937308505118, -43.007243296052536, 
-10.38391053915512, 8.701320499074008, -1.11953570841589, 9.509372925201221, 
24.280559399214702, 22.904522829010432, 17.430182558600123, -17.30186583570596],
"eval_len": [141, 128, 26, 99, 86, 62, 98, 51, 89, 341]}

 59%|█████▉    | 589999/1000000 [8:35:10<4:26:51, 25.61it/s]global step 590000, trans_decision ep_re -3.542062994085261

{"global_step": 590000, "eval_re": [-4.474041724582738, -27.440704653852023, 
-66.33736917106599, 38.84168485477688, 7.774040510678633, 14.478466988484382, 
33.58501691533676, -44.765783546537065, 19.29736967769667, -6.37930979178811], 
"eval_len": [112, 69, 1000, 117, 23, 37, 190, 1000, 52, 168]}

 60%|█████▉    | 599999/1000000 [8:44:00<4:19:20, 25.71it/s]global step 600000, trans_decision ep_re 11.50301191001601

{"global_step": 600000, "eval_re": [47.1168411756012, 3.391759507746747, 
49.14621951870106, 13.723021706103193, -1.3449163025011872, 4.675458722365886, 
-24.758702708338124, 2.4208825440938635, -14.726965056590053, 
35.38651999297753], "eval_len": [113, 39, 153, 1000, 84, 39, 92, 56, 130, 145]}

 61%|██████    | 609997/1000000 [8:52:40<4:11:12, 25.88it/s]global step 610000, trans_decision ep_re -10.796246269534228

{"global_step": 610000, "eval_re": [-7.487040464039414, 9.886276297336202, 
-22.80336661547138, -79.34191806707538, -0.03664727216608177, 
-60.295645868915756, -2.6474354181013444, 25.408800332702924, 
-2.044298332392981, 31.398812712780913], "eval_len": [101, 94, 220, 1000, 45, 
365, 23, 77, 1000, 32]}

 62%|██████▏   | 619999/1000000 [9:01:20<4:04:59, 25.85it/s]global step 620000, trans_decision ep_re -1.7991571125439823

{"global_step": 620000, "eval_re": [-19.74883484065092, 28.226516830223094, 
-8.631889357916506, -48.124588309609116, 0.32156994435902253, 
25.149858511813605, 30.942953065383264, -21.423409241916474, -6.634667575686223,
1.9309198485604326], "eval_len": [141, 44, 37, 173, 68, 23, 32, 272, 121, 30]}

 63%|██████▎   | 629997/1000000 [9:10:00<3:59:52, 25.71it/s]global step 630000, trans_decision ep_re 1.6061919955577153

{"global_step": 630000, "eval_re": [-2.369318227125656, 52.03512540851374, 
3.745451856928038, -0.018513488129806133, 4.003767325617941, -18.68192496595765,
-52.127774989484344, 10.356493243831055, 5.5311879252226905, 
13.587425866161137], "eval_len": [21, 252, 60, 20, 67, 38, 1000, 138, 32, 25]}

 64%|██████▍   | 639999/1000000 [9:18:50<3:53:14, 25.72it/s]global step 640000, trans_decision ep_re -0.7177078178768298

{"global_step": 640000, "eval_re": [29.904147839512106, -2.2932306822461213, 
-23.925339359932018, -38.75283908943035, 47.10452449863185, -2.03056487805856, 
24.25659868020376, 44.56773064579237, -7.790465489807235, -78.21764034343408], 
"eval_len": [93, 82, 1000, 248, 712, 35, 141, 212, 23, 396]}

 65%|██████▍   | 649999/1000000 [9:27:30<3:48:19, 25.55it/s]global step 650000, trans_decision ep_re -84.4834347733162

{"global_step": 650000, "eval_re": [13.732689834437362, -16.78320140349582, 
-11.01519932735102, -133.45936039250682, 1.135533722849962, 29.975657653485122, 
49.78144987410518, -601.8792389393323, -180.3951938940507, 4.072515138696884], 
"eval_len": [68, 216, 113, 1000, 20, 40, 117, 1000, 832, 157]}

 66%|██████▌   | 659997/1000000 [9:36:20<3:40:19, 25.72it/s]global step 660000, trans_decision ep_re -17.752083544166823

{"global_step": 660000, "eval_re": [25.6583149695612, -25.3773455652815, 
24.010883037644987, 31.801572595815436, 40.40032736262157, 35.31979475520904, 
5.910812420016513, -34.748831426146566, -267.2952531139708, 
-13.201110477138123], "eval_len": [1000, 1000, 190, 74, 93, 88, 34, 1000, 1000, 
228]}

 67%|██████▋   | 669999/1000000 [9:45:10<3:32:30, 25.88it/s]global step 670000, trans_decision ep_re -1.0086998583408509

{"global_step": 670000, "eval_re": [7.37692773700039, 24.24818868054216, 
20.88644455760592, -5.7241722060448526, -16.41160782187014, -11.296953423536534,
13.751078380392459, -0.6665787377856469, 2.874902770835927, -45.1252285205482], 
"eval_len": [24, 132, 18, 1000, 48, 1000, 43, 112, 275, 180]}

 68%|██████▊   | 679998/1000000 [9:54:00<3:26:09, 25.87it/s]global step 680000, trans_decision ep_re -2.648288687825386

{"global_step": 680000, "eval_re": [-12.359693713585138, 23.237014033916864, 
5.442538337916561, 2.6927761297205355, -8.908228011615922, -48.21801646314926, 
-11.068351176281045, 15.285698988631024, 6.784931908007818, 0.6284430881847098],
"eval_len": [201, 57, 1000, 227, 122, 1000, 33, 21, 36, 35]}

 69%|██████▉   | 689998/1000000 [10:02:40<3:17:53, 26.11it/s]global step 690000, trans_decision ep_re 4.524989290364458

{"global_step": 690000, "eval_re": [49.173171442763504, 49.65663011462305, 
-41.34448851434957, 16.078138349139635, -29.275431543985224, 6.839916928199575, 
1.25194856148627, 12.260035372117171, -5.213266000493382, -14.176761805856456], 
"eval_len": [177, 1000, 187, 53, 61, 29, 10, 133, 54, 63]}

 70%|██████▉   | 699999/1000000 [10:11:30<3:15:49, 25.53it/s]global step 700000, trans_decision ep_re -4.659468547220533

{"global_step": 700000, "eval_re": [32.062065726675435, 31.792287397942978, 
2.4887609403473077, -42.34001443707027, 4.9865211045312385, 21.61610457122243, 
-5.512296887180952, -32.145486121843746, 18.821052353412117, 
-78.36368012024187], "eval_len": [211, 132, 57, 210, 55, 60, 211, 96, 357, 
1000]}

 71%|███████   | 709997/1000000 [10:20:10<3:07:22, 25.80it/s]global step 710000, trans_decision ep_re -4.125561327686754

{"global_step": 710000, "eval_re": [-100.73336907784493, -127.77163974989625, 
-10.10160807356446, 29.495426291646275, 3.2354756659780435, -9.420468213512594, 
-15.31703669807584, -7.303620799598961, 148.89177040518896, 47.7694569728122], 
"eval_len": [1000, 412, 413, 55, 69, 117, 113, 73, 271, 117]}

 72%|███████▏  | 719997/1000000 [10:28:50<3:00:55, 25.79it/s]global step 720000, trans_decision ep_re -39.76260590308003

{"global_step": 720000, "eval_re": [-90.97161756444603, -42.33706354388582, 
-17.133669823048677, -12.751477658608133, -1.4822523419523164, 
-5.103719908101778, -129.6859073192791, -50.12921489637014, -35.326055358540955,
-12.705080616567388], "eval_len": [1000, 382, 1000, 122, 75, 176, 1000, 291, 
118, 40]}

 73%|███████▎  | 729999/1000000 [10:37:40<2:55:04, 25.70it/s]global step 730000, trans_decision ep_re -23.38384284242338

{"global_step": 730000, "eval_re": [3.1959240925186703, -96.21671540355541, 
-0.3016596951497905, 26.329229448740772, -17.11657344392439, 
-109.09244240548699, -8.80926379685457, -78.2565291553382, 41.120820863823035, 
5.3087810709930965], "eval_len": [25, 1000, 71, 164, 27, 1000, 25, 1000, 142, 
151]}

 74%|███████▍  | 739998/1000000 [10:46:30<2:45:04, 26.25it/s]global step 740000, trans_decision ep_re 13.7916055289319

{"global_step": 740000, "eval_re": [158.4511620089452, -30.908861855706075, 
11.892272974487387, 5.5143865260527125, -80.02770031730938, 19.07142130269411, 
-3.2892384310846166, 29.733485344791617, 5.944398703321098, 21.53472903312693], 
"eval_len": [665, 311, 34, 32, 1000, 39, 144, 276, 74, 185]}

 75%|███████▍  | 749999/1000000 [10:55:20<2:43:33, 25.47it/s]global step 750000, trans_decision ep_re -17.011313443412284

{"global_step": 750000, "eval_re": [13.13118767685168, -6.597253549941247, 
-70.66121393514558, -109.93667234141208, 13.866719884159572, 16.939089814248685,
-86.84837919008174, 41.05717132171299, 8.006989854475973, 10.929226031008923], 
"eval_len": [26, 98, 1000, 1000, 31, 62, 1000, 64, 31, 14]}

 76%|███████▌  | 759997/1000000 [11:04:00<2:36:45, 25.52it/s]global step 760000, trans_decision ep_re -23.67321648434489

{"global_step": 760000, "eval_re": [-119.65950872825383, 33.224826093538844, 
15.21094201231989, 4.728580021192622, -10.878176360567121, -0.4064298705376834, 
-172.6401209939432, 6.453655245445671, 8.235645629227614, -1.0015778918716658], 
"eval_len": [1000, 234, 176, 30, 21, 28, 400, 21, 82, 32]}

 77%|███████▋  | 769999/1000000 [11:12:32<2:29:16, 25.68it/s]global step 770000, trans_decision ep_re 2.55228833960039

{"global_step": 770000, "eval_re": [25.982884293060906, 3.6628261725635682, 
-3.5136732702240074, 0.0640405043003226, 22.191614018855205, 17.49179846544964, 
-33.13334516382695, 25.022651706386153, -33.28449657334855, 1.0385832427876114],
"eval_len": [116, 243, 20, 31, 109, 244, 162, 215, 36, 226]}

 78%|███████▊  | 779998/1000000 [11:21:30<2:21:24, 25.93it/s]global step 780000, trans_decision ep_re 13.904545314907086

{"global_step": 780000, "eval_re": [12.45290974903291, 16.48818605698895, 
39.07580372933622, 37.95094652900375, 24.331107866790394, 5.234930016152943, 
-10.61557315094531, 27.81784254264907, -32.68761386879842, 18.996913678860356], 
"eval_len": [43, 33, 106, 158, 42, 100, 218, 125, 315, 172]}

 79%|███████▉  | 789998/1000000 [11:30:10<2:13:12, 26.28it/s]global step 790000, trans_decision ep_re -17.373640604757107

{"global_step": 790000, "eval_re": [-7.506681405375359, -72.57835205024614, 
16.508582715331908, -56.79925856890119, 3.6781432920807804, -50.41563467153072, 
-66.86590897862278, 38.43001820210536, 13.106275909948916, 8.706409507638163], 
"eval_len": [71, 304, 137, 121, 30, 386, 1000, 95, 30, 290]}

 80%|███████▉  | 799999/1000000 [11:38:50<2:09:09, 25.81it/s]global step 800000, trans_decision ep_re -8.72698909636304

{"global_step": 800000, "eval_re": [0.976484412409788, -28.22229200692295, 
-14.189432506810224, -11.103310618282817, 29.163527743638202, 34.21898320428309,
-58.13557755137779, -17.105583859573184, 69.80364913664755, -92.67633891764207],
"eval_len": [32, 169, 33, 31, 61, 95, 1000, 1000, 575, 1000]}

 81%|████████  | 809999/1000000 [11:47:40<2:02:51, 25.78it/s]global step 810000, trans_decision ep_re -5.0694639637716365

{"global_step": 810000, "eval_re": [-28.675939103475432, 27.401200350732235, 
-19.914962861712173, -7.000714984384047, -117.74355316495947, 7.538092597853257,
31.299079594908687, -8.074785907407806, 28.811130021768967, 35.66581381895941], 
"eval_len": [141, 293, 412, 15, 230, 22, 254, 35, 170, 73]}

 82%|████████▏ | 819998/1000000 [11:56:20<1:54:39, 26.17it/s]global step 820000, trans_decision ep_re -19.219316416160385

{"global_step": 820000, "eval_re": [42.77622464342932, 72.46230616720975, 
-43.89387598750942, -152.88539832193402, -95.62964661744746, -51.61579976969407,
8.643010557663898, 12.132190161742672, -4.2997176655539775, 20.117542670489456],
"eval_len": [163, 295, 112, 1000, 1000, 1000, 36, 31, 86, 60]}

 83%|████████▎ | 829998/1000000 [12:05:10<1:48:19, 26.16it/s]global step 830000, trans_decision ep_re -16.417608696925164

{"global_step": 830000, "eval_re": [-13.116472490348688, -91.19038596673545, 
-3.015256146885673, -95.79711052061819, 10.314347649720883, 30.26510863087221, 
14.14532459061311, 5.361768745421308, -35.63664830457308, 14.493236843281938], 
"eval_len": [33, 1000, 35, 1000, 29, 329, 28, 44, 126, 29]}

 84%|████████▍ | 839999/1000000 [12:14:00<1:44:52, 25.43it/s]global step 840000, trans_decision ep_re -15.556596457835004

{"global_step": 840000, "eval_re": [11.750021886951673, -91.40484261424729, 
-30.46890931401174, 3.416213231219181, -28.990155440075803, 29.009277456894555, 
65.51734416654557, -1.3923787101069878, 52.65261919154724, -165.65515443306646],
"eval_len": [21, 1000, 40, 101, 1000, 34, 177, 55, 78, 1000]}

 85%|████████▍ | 849997/1000000 [12:22:40<1:37:55, 25.53it/s]global step 850000, trans_decision ep_re -26.418018573840744

{"global_step": 850000, "eval_re": [8.532575388276848, -67.26947663481569, 
-6.683488019895346, 36.30229877710524, -52.08469961375886, -155.79274283489028, 
4.05229211158724, 10.915323826229546, -83.54395516738266, 41.391686429136556], 
"eval_len": [37, 1000, 129, 83, 1000, 296, 58, 17, 1000, 137]}

 86%|████████▌ | 859999/1000000 [12:31:30<1:31:41, 25.45it/s]global step 860000, trans_decision ep_re 5.297976819888286

{"global_step": 860000, "eval_re": [-64.44017999112683, -32.27163976006383, 
39.214102912475866, 38.10241974888464, -35.612329157410464, 13.135259979723642, 
59.02713660745505, 19.016824547074695, 21.941376783340328, -5.133203471470223], 
"eval_len": [208, 224, 160, 118, 96, 71, 279, 167, 157, 131]}

 87%|████████▋ | 869998/1000000 [12:40:10<1:23:03, 26.09it/s]global step 870000, trans_decision ep_re -46.5027562925982

{"global_step": 870000, "eval_re": [-56.07480195847494, -28.156061784446933, 
75.69009100706059, -50.37259080590593, 3.5680589450376696, -142.96068457162613, 
20.87865715063368, 1.2339439422485645, -131.08573611391037, 
-157.74843873659827], "eval_len": [214, 228, 221, 165, 36, 1000, 101, 48, 1000, 
1000]}

 88%|████████▊ | 879998/1000000 [12:49:00<1:16:38, 26.09it/s]global step 880000, trans_decision ep_re -21.974879563710243

{"global_step": 880000, "eval_re": [1.6651276260837649, 4.205464093064542, 
9.591220133525523, -6.857608923662074, 52.13375662486565, 5.939163573880002, 
39.12738358634907, -82.0144299033755, -260.3848203789017, 16.845947931068267], 
"eval_len": [83, 83, 42, 34, 107, 31, 214, 385, 1000, 33]}

 89%|████████▉ | 889999/1000000 [12:57:50<1:11:29, 25.64it/s]global step 890000, trans_decision ep_re -52.95431902404484

{"global_step": 890000, "eval_re": [-14.965114899554088, -13.33407703690455, 
-58.028879061592264, 8.846825219377418, 28.867818718063337, -104.4493134591178, 
-154.06104425708241, -32.51068859484811, -92.87285970735151, 
-97.03585716143844], "eval_len": [107, 74, 1000, 68, 100, 320, 1000, 311, 1000, 
1000]}

 90%|████████▉ | 899998/1000000 [13:06:40<1:03:28, 26.26it/s]global step 900000, trans_decision ep_re -0.8540946576799489

{"global_step": 900000, "eval_re": [3.0898455316242255, 22.454109258484568, 
-26.98156685261251, -74.21235401423259, 5.869209760899381, -6.905646527210205, 
3.4101887846018055, 54.380187398235904, 6.940846597846602, 3.4142334855633396], 
"eval_len": [80, 234, 1000, 1000, 1000, 12, 46, 149, 138, 135]}

 91%|█████████ | 909999/1000000 [13:15:30<58:21, 25.70it/s]global step 910000, trans_decision ep_re -28.63889232621029

{"global_step": 910000, "eval_re": [9.790405060815573, 19.06373636210672, 
7.878544503285125, -34.28751210954592, -33.06531739910089, -144.91413483040412, 
-124.53682600442428, 8.505445599577541, -2.724000083779456, 7.9007356393668395],
"eval_len": [29, 32, 360, 111, 90, 1000, 1000, 19, 147, 19]}

 92%|█████████▏| 919999/1000000 [13:24:10<51:28, 25.90it/s]global step 920000, trans_decision ep_re 4.377026415562055

{"global_step": 920000, "eval_re": [2.211681190791417, 85.05301410146811, 
-50.4377784938004, 32.72134950146826, -33.34435538429797, -2.788476209845902, 
-44.736373848950606, 48.052398595195314, 5.32490013251304, 1.7139045710792928], 
"eval_len": [16, 343, 88, 165, 281, 412, 1000, 85, 384, 42]}

 93%|█████████▎| 929999/1000000 [13:33:00<44:59, 25.93it/s]global step 930000, trans_decision ep_re -46.67816818429092

{"global_step": 930000, "eval_re": [-15.35014433435775, -92.91046768834839, 
-15.076465662964752, -142.8361430725442, -65.78228364949004, 6.705350346476735, 
6.520016562619178, -130.72144471749417, 14.736052686819185, -32.06615231362498],
"eval_len": [102, 1000, 54, 738, 266, 82, 61, 1000, 19, 1000]}

 94%|█████████▍| 939998/1000000 [13:41:50<38:26, 26.01it/s]global step 940000, trans_decision ep_re -55.20485595916731

{"global_step": 940000, "eval_re": [-11.645767561314491, 6.2786973830308215, 
-86.08273639558442, -7.450052479770636, -46.8783345830611, -24.214362356658288, 
-75.64447273395432, 10.252298034483193, -321.05345976755785, 4.389630868714029],
"eval_len": [1000, 167, 1000, 67, 213, 1000, 1000, 62, 1000, 126]}

 95%|█████████▍| 949998/1000000 [13:50:40<32:03, 26.00it/s]global step 950000, trans_decision ep_re -4.5608085085124035

{"global_step": 950000, "eval_re": [-93.59946431152517, -2.876083879190497, 
15.299131614895598, -13.864225312122192, 22.072762392950025, -0.332925463708735,
56.98038319647709, 3.773144988626175, -33.59459831730528, 0.5337900057789408], 
"eval_len": [1000, 205, 83, 37, 71, 44, 220, 75, 84, 31]}

 96%|█████████▌| 959997/1000000 [13:59:20<25:37, 26.02it/s]global step 960000, trans_decision ep_re 3.145401428787328

{"global_step": 960000, "eval_re": [7.16596585325692, 11.176529978106814, 
-3.3628865779451598, -28.858116776428115, -6.921890726166683, 
29.017606330612534, 12.206103094741916, 0.4243683101565854, 0.4752805201107191, 
10.131054281427746], "eval_len": [130, 65, 18, 214, 266, 1000, 55, 28, 205, 
1000]}

 97%|█████████▋| 969997/1000000 [14:08:10<19:26, 25.73it/s]global step 970000, trans_decision ep_re -7.631900262851422

{"global_step": 970000, "eval_re": [-10.490392018298092, -32.34205320532456, 
8.790618225831896, -34.55513066496791, -44.74221418314155, 4.458295483328215, 
0.1031644198636541, 20.04243351486753, 4.314750461862977, 8.101525337463627], 
"eval_len": [100, 85, 138, 1000, 41, 14, 207, 20, 33, 46]}

 98%|█████████▊| 979999/1000000 [14:16:50<12:52, 25.90it/s]global step 980000, trans_decision ep_re 14.545120302606492

{"global_step": 980000, "eval_re": [32.066396225826445, -23.117790706301715, 
1.2425758667052096, -1.3652289651383327, 13.850332140366426, 16.621260259671658,
-10.034680702222442, 97.4866330625511, 8.715267913136035, 9.986437931470498], 
"eval_len": [125, 60, 140, 90, 41, 18, 1000, 215, 36, 55]}

 99%|█████████▉| 989999/1000000 [14:25:30<06:32, 25.48it/s]global step 990000, trans_decision ep_re -127.85283011886114

{"global_step": 990000, "eval_re": [48.15540562016823, 46.76466339955305, 
24.58007432626156, -6.901741884109043, -2.041276584971173, -650.6702408802865, 
-716.9041006557227, 6.002536860737889, -7.020633860103385, -20.492987530139423],
"eval_len": [71, 154, 136, 1000, 1000, 1000, 1000, 42, 98, 1000]}

100%|█████████▉| 999999/1000000 [14:34:30<00:00, 25.94it/s]global step 1000000, trans_decision ep_re -21.7667500622269

{"global_step": 1000000, "eval_re": [-62.8364085667586, 13.742599366824285, 
-220.11395227580456, 15.503772369163315, -77.31924672128682, 48.257614095225186,
29.548674304864097, 2.170627320831559, -4.958825977797932, 38.33764546247038], 
"eval_len": [1000, 19, 1000, 27, 1000, 210, 78, 183, 51, 1000]}

100%|██████████| 1000000/1000000 [14:34:40<00:00, 19.05it/s]
