
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.1
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9998/1000000 [03:24<8:35:55, 31.98it/s]global step 10000, trans_decision ep_re 58.52752366697448

{"global_step": 10000, "eval_re": [20.67753727298019, 63.439796527616245, 
33.183456146200726, 30.946737933933147, 21.027589637123953, 179.62947976390504, 
54.4880200775106, 71.8883824218619, 80.45851902146146, 29.53571786715161], 
"eval_len": [30, 81, 37, 36, 34, 163, 77, 88, 106, 44]}

  2%|▏         | 19999/1000000 [10:13<8:29:09, 32.08it/s]global step 20000, trans_decision ep_re 35.03322813610539

{"global_step": 20000, "eval_re": [6.483785769466903, 7.854707710182659, 
90.63299585481211, 12.832408459621934, 11.069625677324387, 12.904768535718842, 
11.741414949279632, 33.628992794498956, 150.24975113304774, 12.93383047710077], 
"eval_len": [20, 18, 207, 22, 21, 22, 23, 133, 124, 25]}

  3%|▎         | 29997/1000000 [17:02<8:26:41, 31.91it/s]global step 30000, trans_decision ep_re 96.59388973684513

{"global_step": 30000, "eval_re": [90.12400276250261, 11.979822284783912, 
14.352313557961953, 265.11463619501234, 17.693335671152887, 9.994872949811958, 
106.85101412562526, 11.759257761809696, 13.583516365112864, 424.4861256946779], 
"eval_len": [91, 23, 23, 205, 25, 20, 98, 23, 24, 321]}

  4%|▍         | 39999/1000000 [23:52<8:22:44, 31.83it/s]global step 40000, trans_decision ep_re 49.69010331149821

{"global_step": 40000, "eval_re": [105.2030999808011, 21.026888963483493, 
24.48037055310886, 14.145508774201307, 179.15744019771978, 9.685814064059892, 
8.042223726229405, 10.185479434522188, 109.81373758708688, 15.16046983376925], 
"eval_len": [88, 30, 41, 23, 112, 20, 21, 24, 90, 24]}

  5%|▍         | 49999/1000000 [30:41<8:22:39, 31.50it/s]global step 50000, trans_decision ep_re 67.4091436489247

{"global_step": 50000, "eval_re": [18.001276252606086, 258.8508783571319, 
9.532312055890227, 21.26366257285219, 166.0722114989318, 13.7795484488832, 
138.441642760555, 14.515293756808765, 12.687083363826241, 20.947527421761595], 
"eval_len": [25, 150, 20, 27, 118, 22, 124, 24, 23, 27]}

  6%|▌         | 59997/1000000 [37:37<8:24:16, 31.07it/s]global step 60000, trans_decision ep_re 58.13236366483234

{"global_step": 60000, "eval_re": [13.837633908293395, 10.776778978075317, 
253.82872059553213, 13.046447053978005, 14.38097529515925, 9.740543751919676, 
11.253274616476123, 236.03697135922783, 12.588234714315156, 5.834056375346441], 
"eval_len": [23, 21, 149, 25, 23, 24, 22, 138, 23, 21]}

  7%|▋         | 69999/1000000 [44:35<8:22:49, 30.83it/s]global step 70000, trans_decision ep_re 20.04516620160292

{"global_step": 70000, "eval_re": [12.812253680167439, 15.680109786151736, 
9.23636540101096, 22.84590770217771, 16.125863942012277, 13.253661340554, 
9.986444133680715, 14.195614966401411, 16.417915023216406, 69.89752604065659], 
"eval_len": [21, 24, 22, 30, 24, 23, 21, 22, 26, 63]}

  8%|▊         | 79997/1000000 [51:30<8:08:46, 31.37it/s]global step 80000, trans_decision ep_re 63.183478668520294

{"global_step": 80000, "eval_re": [14.852339227246025, 238.6774048419345, 
174.52262750863562, 7.318997444720227, 11.1681795804711, 10.940552259950483, 
11.741274527333095, 140.57612494936782, 10.41210566591196, 11.625180679632031], 
"eval_len": [23, 167, 148, 20, 21, 22, 23, 110, 22, 23]}

  9%|▉         | 89997/1000000 [58:23<7:58:19, 31.71it/s]global step 90000, trans_decision ep_re 22.152936318357284

{"global_step": 90000, "eval_re": [11.54729140730271, 6.053250833939628, 
8.37730367913812, 5.499921616589938, 11.753088014460866, 8.484082639469433, 
12.064966143775107, 133.72161825842295, 13.815065473994094, 10.212775116480014],
"eval_len": [22, 21, 20, 21, 22, 18, 22, 103, 23, 23]}

 10%|▉         | 99999/1000000 [1:05:16<7:55:58, 31.51it/s]global step 100000, trans_decision ep_re 68.2058277020056

{"global_step": 100000, "eval_re": [289.5013931503777, 15.372730078856838, 
8.343586803963685, 108.79612411570099, 9.191168334743315, 201.4290407844734, 
13.187987614022239, 11.669800469533302, 11.283998006230046, 13.282447662154555],
"eval_len": [172, 22, 19, 103, 19, 162, 21, 21, 21, 22]}

 11%|█         | 109999/1000000 [1:12:20<7:40:47, 32.19it/s]global step 110000, trans_decision ep_re 37.67163209920232

{"global_step": 110000, "eval_re": [15.322220011858537, 12.49153016436303, 
9.435583332019556, 15.333646065333525, 10.635557012248391, 16.097776190107258, 
5.077447618665581, 16.33255565916042, 82.49125878291723, 193.4987461553497], 
"eval_len": [24, 23, 21, 23, 22, 24, 16, 25, 99, 196]}

 12%|█▏        | 119999/1000000 [1:19:03<8:01:54, 30.43it/s]global step 120000, trans_decision ep_re 32.48136697345969

{"global_step": 120000, "eval_re": [213.4567417189562, 11.282481718820735, 
13.382482503730886, 7.660671760913178, 14.666146458305258, 14.108284830631185, 
12.037622330098863, 9.180244916059284, 20.186724148316912, 8.852269348764445], 
"eval_len": [150, 21, 22, 20, 24, 23, 23, 20, 26, 20]}

 13%|█▎        | 129997/1000000 [1:25:56<7:47:53, 30.99it/s]global step 130000, trans_decision ep_re 69.22798690157626

{"global_step": 130000, "eval_re": [12.345956253723925, 133.3523338874184, 
87.02904514207106, 13.167875679622174, 11.827453685664144, 168.1037907731661, 
83.88448252967686, 8.57409711018656, 156.31244101434362, 17.682392939889727], 
"eval_len": [23, 99, 96, 22, 22, 248, 68, 19, 104, 24]}

 14%|█▍        | 139999/1000000 [1:32:53<7:37:08, 31.35it/s]global step 140000, trans_decision ep_re 77.98678817047514

{"global_step": 140000, "eval_re": [36.02332602602819, 11.520304106219031, 
154.04874391073955, 12.987899705490008, 16.22613413284466, 15.035789042290627, 
17.06576925788065, 490.0173600574593, 12.529493655458083, 14.41306181034121], 
"eval_len": [43, 20, 172, 22, 23, 23, 24, 256, 21, 23]}

 15%|█▍        | 149997/1000000 [1:39:51<7:33:13, 31.26it/s]global step 150000, trans_decision ep_re 61.16251630259868

{"global_step": 150000, "eval_re": [18.7650973493239, 168.2347241953844, 
13.448522871014351, 19.60718519505931, 11.084723129365782, 17.314935416893913, 
13.573455240089636, 17.273621623478938, 320.07571755406957, 12.247180451306978],
"eval_len": [28, 112, 21, 27, 23, 25, 22, 25, 151, 22]}

 16%|█▌        | 159999/1000000 [1:46:48<7:29:32, 31.14it/s]global step 160000, trans_decision ep_re 88.16578182774072

{"global_step": 160000, "eval_re": [17.182046927696216, 13.940229418169368, 
11.516403562864783, 13.988304191387844, 10.976399732285016, 16.475816825242767, 
402.90271001813534, 23.62816666098399, 12.601934608961553, 358.4458063316802], 
"eval_len": [24, 23, 21, 23, 21, 24, 223, 27, 23, 212]}

 17%|█▋        | 169999/1000000 [1:53:43<7:30:11, 30.73it/s]global step 170000, trans_decision ep_re 114.328382269805

{"global_step": 170000, "eval_re": [11.301479617930768, 13.026082597649545, 
14.631814681094664, 139.57305660700845, 13.183224753892265, 72.65202992968358, 
172.54488851777904, 98.75169836519936, 223.45581639022726, 384.1637312375851], 
"eval_len": [21, 21, 23, 237, 22, 111, 140, 139, 192, 209]}

 18%|█▊        | 179997/1000000 [2:00:43<7:17:35, 31.23it/s]global step 180000, trans_decision ep_re 53.868589028380185

{"global_step": 180000, "eval_re": [16.947070272827514, 16.11905429040532, 
13.89121765470945, 11.651422758299383, 412.1780561329102, 13.755110560053856, 
16.943952186850606, 12.281632479131574, 15.659447589318637, 9.25892635929525], 
"eval_len": [23, 23, 23, 21, 197, 22, 24, 21, 25, 20]}

 19%|█▉        | 189999/1000000 [2:07:41<7:14:06, 31.10it/s]global step 190000, trans_decision ep_re 50.88880181976806

{"global_step": 190000, "eval_re": [13.096476386167447, 16.814754576621894, 
10.980571869122747, 15.924984968781452, 16.75029434057447, 377.43587374415404, 
12.447362431890276, 15.242604100311123, 14.331336495900182, 15.863759284156982],
"eval_len": [23, 24, 19, 23, 23, 285, 21, 22, 22, 23]}

 20%|█▉        | 199999/1000000 [2:14:50<7:10:09, 31.00it/s]global step 200000, trans_decision ep_re 169.9612477604205

{"global_step": 200000, "eval_re": [382.3999850339474, 15.346818591317433, 
13.665945409001806, 9.398056816252529, 8.851788861379235, 8.204755846054432, 
13.0833628166351, 14.261925453940428, 1228.681906558357, 5.717932217319516], 
"eval_len": [363, 23, 24, 20, 19, 19, 24, 25, 685, 17]}

 21%|██        | 209997/1000000 [2:21:40<7:06:27, 30.87it/s]global step 210000, trans_decision ep_re 408.0750309324331

{"global_step": 210000, "eval_re": [19.10908384513234, 14.798020599240672, 
664.1586826403599, 16.068548961097434, 756.5181154791474, 395.6422498718365, 
1401.383170461569, 18.072830879999582, 784.5384297728078, 10.461176813140499], 
"eval_len": [25, 23, 320, 23, 326, 167, 730, 25, 376, 20]}

 22%|██▏       | 219999/1000000 [2:28:41<7:06:09, 30.50it/s]global step 220000, trans_decision ep_re 211.193913517009

{"global_step": 220000, "eval_re": [15.707153578561401, 108.18520004695263, 
218.48651873973472, 993.3242216073992, 10.97947575687702, 727.7806559977925, 
14.42260709404138, 10.712348314004188, 6.397388268526571, 5.943565766200526], 
"eval_len": [24, 110, 173, 436, 20, 309, 23, 22, 18, 18]}

 23%|██▎       | 229997/1000000 [2:35:41<6:46:26, 31.58it/s]global step 230000, trans_decision ep_re 254.81213712525604

{"global_step": 230000, "eval_re": [10.916654363422166, 14.111340807998486, 
11.965852537238911, 16.95840160268713, 13.437087280733671, 10.876830209710759, 
11.950173120716055, 545.4514247761131, 1896.707329862386, 15.746276691554234], 
"eval_len": [20, 22, 22, 25, 21, 21, 22, 237, 848, 23]}

 24%|██▍       | 239997/1000000 [2:42:42<7:02:04, 30.01it/s]global step 240000, trans_decision ep_re 11.675163103349314

{"global_step": 240000, "eval_re": [13.796389823651419, 9.920956226720053, 
13.942576386094869, 11.780629530569385, 14.394113365404888, 11.30968890471284, 
15.75229327065429, 5.56426632431146, 4.713320558619338, 15.577396642754586], 
"eval_len": [22, 19, 23, 20, 23, 21, 24, 15, 18, 23]}

 25%|██▍       | 249997/1000000 [2:49:50<6:38:08, 31.40it/s]global step 250000, trans_decision ep_re 122.70149678251914

{"global_step": 250000, "eval_re": [8.222612144366314, 6.837065489018329, 
11.703277873641397, 755.8836111962097, 220.20113646091986, 13.822376287582081, 
14.860793496792702, 173.7625295462893, 10.084429822192686, 11.637135508179066], 
"eval_len": [20, 19, 21, 320, 115, 24, 25, 125, 22, 21]}

 26%|██▌       | 259999/1000000 [2:56:50<7:09:05, 28.74it/s]global step 260000, trans_decision ep_re 146.29677946736186

{"global_step": 260000, "eval_re": [13.390446182872749, 11.727740691076386, 
9.550405260007746, 467.311880217144, 447.78396341772833, 9.76993210839694, 
11.000589012010916, 468.581141585236, 10.293610017647698, 13.558086181497874], 
"eval_len": [23, 21, 20, 205, 238, 19, 21, 221, 20, 26]}

 27%|██▋       | 269997/1000000 [3:03:41<6:32:59, 30.96it/s]global step 270000, trans_decision ep_re 11.746881252286745

{"global_step": 270000, "eval_re": [14.405914113546089, 13.354094995558123, 
6.36790834015203, 10.829969026845955, 13.523018959470901, 10.794046044648317, 
14.201494162947744, 13.736740015467474, 8.65782778753668, 11.597799076694134], 
"eval_len": [23, 21, 17, 21, 21, 20, 23, 22, 21, 21]}

 28%|██▊       | 279999/1000000 [3:10:39<6:37:13, 30.21it/s]global step 280000, trans_decision ep_re 11.253125066728154

{"global_step": 280000, "eval_re": [13.106786216366878, 11.941134639018747, 
10.548088495072385, 7.578571552873184, 14.275363622251216, 14.491897506337494, 
7.1077636344923265, 9.784860520069904, 10.370172625325022, 13.32661185547439], 
"eval_len": [22, 21, 21, 18, 22, 22, 19, 20, 21, 22]}

 29%|██▉       | 289999/1000000 [3:17:34<6:17:52, 31.31it/s]global step 290000, trans_decision ep_re 12.369159787399921

{"global_step": 290000, "eval_re": [12.96795601973642, 13.142369360568189, 
12.072183319824862, 8.160314778469296, 14.47062207847899, 17.54385401468028, 
4.718939198439045, 14.309673386625013, 11.737107684265393, 14.56857803291172], 
"eval_len": [23, 21, 22, 19, 22, 23, 20, 24, 21, 23]}

 30%|██▉       | 299997/1000000 [3:24:50<6:16:56, 30.95it/s]global step 300000, trans_decision ep_re 923.765778525627

{"global_step": 300000, "eval_re": [1325.3169150839672, 13.80268949250591, 
14.340180823741887, 1204.3989509843461, 12.634958486462548, 15.833399268564277, 
8.796886245841876, 2336.9790653263462, 2536.545351091303, 1769.009388453191], 
"eval_len": [494, 23, 22, 441, 22, 24, 20, 754, 853, 619]}

 31%|███       | 309997/1000000 [3:31:37<6:04:36, 31.54it/s]global step 310000, trans_decision ep_re 255.39717387298683

{"global_step": 310000, "eval_re": [8.81961526550463, 935.57261919597, 
12.00478975231191, 12.8091698657508, 7.026749587834318, 152.2115725355309, 
1389.2768550282074, 15.912091475361192, 7.09391906796407, 13.24435695543338], 
"eval_len": [19, 311, 23, 22, 18, 114, 492, 25, 22, 22]}

 32%|███▏      | 319997/1000000 [3:38:50<6:10:36, 30.58it/s]global step 320000, trans_decision ep_re 13.55825871588996

{"global_step": 320000, "eval_re": [11.808836655993893, 13.112323020620066, 
9.372078432643168, 11.912870315229775, 13.95613500986343, 15.160769282767998, 
13.103778649343543, 14.598440251482376, 15.9763923245359, 16.580963216419466], 
"eval_len": [23, 23, 19, 22, 23, 23, 23, 23, 24, 25]}

 33%|███▎      | 329999/1000000 [3:45:42<6:04:08, 30.67it/s]global step 330000, trans_decision ep_re 303.1481180434327

{"global_step": 330000, "eval_re": [12.919633699257524, 7.618255373501429, 
2931.77120598187, 16.682120264666267, 6.792223201766446, 13.466296556259435, 
8.382627041404604, 5.416251833727724, 13.935488678847832, 14.497077803026164], 
"eval_len": [21, 18, 1000, 25, 17, 23, 20, 21, 22, 23]}

 34%|███▍      | 339997/1000000 [3:52:42<6:00:57, 30.47it/s]global step 340000, trans_decision ep_re 489.843716211826

{"global_step": 340000, "eval_re": [14.515596053404826, 2178.0152164389337, 
2237.3192117749327, 9.610447056860767, 13.065384845650284, 6.5007560669385995, 
11.52343366092252, 8.1972029782859, 407.3041642972476, 12.385748945083428], 
"eval_len": [23, 680, 672, 19, 21, 21, 21, 19, 198, 21]}

 35%|███▍      | 349999/1000000 [4:00:00<5:41:17, 31.74it/s]global step 350000, trans_decision ep_re 700.269443022149

{"global_step": 350000, "eval_re": [13.186711415009915, 2817.023960349847, 
7.214312557896996, 14.73100110754625, 1466.9720585060609, 113.66590820465649, 
1363.0847920248902, 1180.5848658062112, 14.429786028712972, 11.801034220656973],
"eval_len": [22, 868, 18, 24, 500, 71, 428, 407, 24, 23]}

 36%|███▌      | 359997/1000000 [4:07:00<5:49:55, 30.48it/s]global step 360000, trans_decision ep_re 837.8446510144164

{"global_step": 360000, "eval_re": [1593.8442806150142, 11.698585596713626, 
151.23975221158938, 13.767803738991923, 2092.2252709014742, 1071.5985330767696, 
525.6205421259206, 14.579111444561905, 1121.703274134822, 1782.1693562983066], 
"eval_len": [451, 21, 85, 23, 728, 379, 219, 23, 389, 574]}

 37%|███▋      | 369997/1000000 [4:14:10<5:35:12, 31.32it/s]global step 370000, trans_decision ep_re 908.3442596249988

{"global_step": 370000, "eval_re": [14.635971154274811, 1526.9935210781084, 
162.86564560794983, 3102.312907295581, 16.21803954584674, 911.0827372471297, 
15.527923949425293, 3304.790542283441, 13.917045799221023, 15.098262289009998], 
"eval_len": [25, 447, 104, 914, 25, 334, 24, 1000, 21, 25]}

 38%|███▊      | 379997/1000000 [4:21:10<5:31:47, 31.14it/s]global step 380000, trans_decision ep_re 1112.2063288292645

{"global_step": 380000, "eval_re": [11.666536669649702, 7.800077491823994, 
1759.5557766249206, 3007.735218583917, 2126.3150252556397, 862.1384443592967, 
9.392227017161728, 6.636262787401871, 916.2659101950505, 2414.557809307784], 
"eval_len": [22, 21, 512, 895, 606, 293, 19, 18, 346, 673]}

 39%|███▉      | 389997/1000000 [4:27:54<5:15:00, 32.27it/s]global step 390000, trans_decision ep_re 477.3798437014868

{"global_step": 390000, "eval_re": [12.088847394510681, 16.175830847512703, 
3062.860319043763, 14.147287169976998, 1611.1941855704376, 7.82933130483196, 
12.257138084798283, 13.036676346369118, 14.363813987534359, 9.845007265132434], 
"eval_len": [21, 24, 1000, 23, 654, 18, 21, 22, 22, 20]}

 40%|███▉      | 399999/1000000 [4:34:51<5:13:23, 31.91it/s]global step 400000, trans_decision ep_re 899.0872572839074

{"global_step": 400000, "eval_re": [12.00080050886812, 8.541636801938981, 
11.158389498333872, 2657.3667496522603, 139.1139685156797, 13.27876028988135, 
3355.816364801074, 2771.460483937277, 15.332904061107357, 6.802514772652454], 
"eval_len": [22, 21, 21, 802, 81, 23, 1000, 787, 23, 18]}

 41%|████      | 409997/1000000 [4:41:50<5:17:53, 30.93it/s]global step 410000, trans_decision ep_re 901.6897993116366

{"global_step": 410000, "eval_re": [245.83782134622052, 6.621437364579545, 
192.29949633533155, 11.03825289555456, 1179.658551805188, 3480.7560330380115, 
9.7268187486632, 2161.4382186677, 1715.5611328792245, 13.960230035892488], 
"eval_len": [150, 17, 116, 22, 355, 1000, 22, 606, 473, 23]}

 42%|████▏     | 419997/1000000 [4:48:51<5:08:30, 31.33it/s]global step 420000, trans_decision ep_re 681.1729771553983

{"global_step": 420000, "eval_re": [10.766991392021547, 8.238324799273633, 
3627.6914976853773, 14.949940068510037, 1804.4612991333024, 9.571233165957532, 
8.012490487756159, 241.43595991144846, 188.09497118605447, 898.5070637242807], 
"eval_len": [23, 19, 1000, 23, 521, 22, 20, 164, 119, 359]}

 43%|████▎     | 429999/1000000 [4:55:51<4:59:40, 31.70it/s]global step 430000, trans_decision ep_re 100.23970526750551

{"global_step": 430000, "eval_re": [11.12639917128737, 9.178639565448346, 
9.808875527119087, 50.80077218433355, 156.25354144220026, 8.078397069436235, 
308.1377221272021, 178.59552251931007, 87.38264494284205, 183.034538125876], 
"eval_len": [22, 22, 20, 123, 120, 18, 166, 124, 81, 107]}

 44%|████▍     | 439997/1000000 [5:02:45<5:00:25, 31.07it/s]global step 440000, trans_decision ep_re 28.08789950397528

{"global_step": 440000, "eval_re": [9.904215301667454, 12.896574403405621, 
106.20089832178738, 14.216616398762778, 12.616499307315406, 12.122179265956099, 
11.240828514375798, 15.684708040101777, 71.07225979650158, 14.92421568987893], 
"eval_len": [20, 24, 90, 23, 22, 23, 21, 23, 84, 23]}

 45%|████▍     | 449997/1000000 [5:09:40<4:48:13, 31.80it/s]global step 450000, trans_decision ep_re 12.235996173316217

{"global_step": 450000, "eval_re": [16.461182829281107, 10.307711973677703, 
13.811912192841902, 10.909466615890674, 15.36031037608614, 13.865337686522667, 
14.217801764015515, 11.229267815640387, 6.050106493981958, 10.146863985224114], 
"eval_len": [24, 20, 22, 21, 23, 22, 22, 21, 17, 20]}

 46%|████▌     | 459997/1000000 [5:16:32<4:51:55, 30.83it/s]global step 460000, trans_decision ep_re 361.39646460850173

{"global_step": 460000, "eval_re": [11.979678557224501, 104.72439480615901, 
9.267544744809937, 2779.464628191852, 277.9864246038589, 219.55538365820098, 
6.951940770673846, 182.34137184785223, 11.46006859688871, 10.233210307497384], 
"eval_len": [22, 90, 19, 754, 136, 110, 17, 98, 21, 22]}

 47%|████▋     | 469999/1000000 [5:23:40<4:42:15, 31.29it/s]global step 470000, trans_decision ep_re 471.62870108723627

{"global_step": 470000, "eval_re": [7.212708333118707, 139.71110315092935, 
139.39851283811183, 183.98674343063183, 11.255459969042604, 5.508940902704401, 
15.121066042618134, 12.119734596150444, 3955.819178464808, 246.1535631442476], 
"eval_len": [21, 112, 95, 122, 21, 17, 23, 22, 1000, 151]}

 48%|████▊     | 479997/1000000 [5:30:40<4:43:47, 30.54it/s]global step 480000, trans_decision ep_re 1059.686708785978

{"global_step": 480000, "eval_re": [5.677695839602088, 109.89785262984692, 
110.23258355904035, 2467.733220567983, 7.076176402608637, 13.619168388613717, 
158.6691045552897, 3787.947548269163, 3808.9969616590615, 127.01677598856928], 
"eval_len": [20, 60, 85, 614, 22, 22, 114, 1000, 1000, 89]}

 49%|████▉     | 489997/1000000 [5:37:33<4:33:16, 31.10it/s]global step 490000, trans_decision ep_re 633.3289830139757

{"global_step": 490000, "eval_re": [11.410432024963356, 11.974836115518386, 
197.58480698150262, 1398.3497467408558, 3669.8091879790827, 134.76630832132454, 
11.023229427584816, 88.9326819229692, 798.7430185639893, 10.695582061965657], 
"eval_len": [21, 21, 111, 399, 927, 116, 20, 85, 268, 21]}

 50%|████▉     | 499999/1000000 [5:44:35<4:32:29, 30.58it/s]global step 500000, trans_decision ep_re 263.24387079094174

{"global_step": 500000, "eval_re": [12.239970541089775, 15.678042547549433, 
252.64381533854308, 1690.8492963728918, 15.619296738854363, 7.951762334270701, 
9.181834891905812, 13.847800408482767, 604.6305315081647, 9.796357227664739], 
"eval_len": [21, 22, 136, 522, 24, 20, 20, 22, 209, 19]}

 51%|█████     | 509997/1000000 [5:51:34<4:18:48, 31.55it/s]global step 510000, trans_decision ep_re 581.2124912453587

{"global_step": 510000, "eval_re": [3.6795467207765338, 14.816869655719081, 
8.100002770957813, 13.916987528956295, 250.63122196154023, 6.5013106643746355, 
3949.1259055387472, 1410.8705080892541, 14.575746492040059, 139.90681303122167],
"eval_len": [16, 22, 21, 24, 146, 20, 1000, 442, 21, 89]}

 52%|█████▏    | 519999/1000000 [5:58:34<4:12:42, 31.66it/s]global step 520000, trans_decision ep_re 245.79669034466787

{"global_step": 520000, "eval_re": [142.27575627444048, 4.9625536954780625, 
95.3492880250181, 189.791208293437, 13.237908193674231, 14.611600886061346, 
397.1579627661447, 13.863012804715463, 125.86391232976649, 1460.8537001779432], 
"eval_len": [102, 21, 84, 101, 22, 24, 176, 23, 78, 390]}

 53%|█████▎    | 529997/1000000 [6:05:31<4:10:39, 31.25it/s]global step 530000, trans_decision ep_re 404.31843557909394

{"global_step": 530000, "eval_re": [13.909529373750237, 12.607062607495724, 
150.5803315621037, 1790.2013904767125, 2024.2287352982603, 13.13550655741164, 
9.715998782926109, 13.250570243742184, 5.033518699342791, 10.521712189193908], 
"eval_len": [23, 22, 124, 487, 528, 24, 20, 23, 17, 22]}

 54%|█████▍    | 539999/1000000 [6:12:30<4:10:46, 30.57it/s]global step 540000, trans_decision ep_re 734.9792388036277

{"global_step": 540000, "eval_re": [2175.57408342383, 4311.994022915385, 
57.23123283161771, 627.6696539031363, 12.5567572779837, 123.67109507662977, 
15.09402516247884, 5.722556932832579, 7.850246777591581, 12.428713734792195], 
"eval_len": [585, 1000, 70, 238, 23, 96, 23, 17, 18, 21]}

 55%|█████▍    | 549997/1000000 [6:19:32<4:00:26, 31.19it/s]global step 550000, trans_decision ep_re 502.23087342771703

{"global_step": 550000, "eval_re": [8.337415213917147, 13.790466084963715, 
778.2272433709251, 14.808990784830076, 10.661090869529218, 4153.317687443314, 
8.620253960831155, 11.478606075315941, 10.299325429218948, 12.76765504432571], 
"eval_len": [18, 25, 309, 23, 22, 1000, 19, 21, 20, 23]}

 56%|█████▌    | 559999/1000000 [6:26:32<3:55:36, 31.13it/s]global step 560000, trans_decision ep_re 716.4917796338715

{"global_step": 560000, "eval_re": [10.28906574349165, 4072.9080929348775, 
9.179044145051227, 13.753721475450478, 518.2792937140949, 14.727346666288575, 
110.34353347675965, 2276.248702619593, 125.94125281393775, 13.247742749169937], 
"eval_len": [20, 1000, 20, 24, 202, 24, 72, 593, 81, 22]}

 57%|█████▋    | 569997/1000000 [6:33:33<3:49:34, 31.22it/s]global step 570000, trans_decision ep_re 23.151052264912373

{"global_step": 570000, "eval_re": [12.240929220990656, 12.37792622298779, 
7.939923984647631, 7.592703119073124, 15.328942688256184, 5.625944056279847, 
9.124582944796948, 12.853417755231378, 134.42845852327076, 13.997694133589397], 
"eval_len": [21, 22, 19, 19, 23, 16, 21, 22, 94, 24]}

 58%|█████▊    | 579997/1000000 [6:40:40<3:44:45, 31.14it/s]global step 580000, trans_decision ep_re 216.82551717138267

{"global_step": 580000, "eval_re": [15.070387960126963, 13.197403942092789, 
14.451297981300861, 11.361955039078346, 1362.0899785708236, 6.955558847756679, 
14.162907655626041, 600.7088738953225, 115.71509239490369, 14.541715426795285], 
"eval_len": [23, 24, 22, 23, 366, 17, 23, 250, 87, 22]}

 59%|█████▉    | 589999/1000000 [6:47:40<3:41:33, 30.84it/s]global step 590000, trans_decision ep_re 720.1173227934448

{"global_step": 590000, "eval_re": [8.122689793404046, 3024.1510914951473, 
8.508237351263846, 8.712786508048413, 11.08217961823741, 4055.182014203184, 
7.899266458440081, 52.658075208230066, 11.987267523037758, 12.869619775456393], 
"eval_len": [23, 746, 19, 18, 21, 1000, 20, 79, 21, 22]}

 60%|█████▉    | 599997/1000000 [6:54:32<3:42:48, 29.92it/s]global step 600000, trans_decision ep_re 551.4670803744588

{"global_step": 600000, "eval_re": [12.780950831450474, 11.56748840166025, 
10.689674502518125, 20.8829834958144, 9.918082489505087, 14.608946728301776, 
1427.9509899567126, 3988.272151229262, 5.528700401955522, 12.470835707407458], 
"eval_len": [23, 21, 20, 35, 20, 22, 412, 947, 18, 21]}

 61%|██████    | 609999/1000000 [7:01:32<3:32:59, 30.52it/s]global step 610000, trans_decision ep_re 174.4756946744477

{"global_step": 610000, "eval_re": [749.9279696102382, 11.496087162267044, 
15.159058809796973, 14.468216364943475, 16.68825251866583, 7.874505193493673, 
12.498251331986692, 195.52116706204237, 706.7814540385059, 14.341984652536894], 
"eval_len": [283, 22, 22, 22, 25, 19, 22, 173, 304, 23]}

 62%|██████▏   | 619997/1000000 [7:08:40<3:23:16, 31.16it/s]global step 620000, trans_decision ep_re 693.828942126647

{"global_step": 620000, "eval_re": [13.610016944993676, 559.6027575833637, 
865.336915782357, 624.8759793413083, 10.285951379501176, 91.77989121414396, 
3284.7068201605744, 10.220129637933047, 1468.0258742212197, 9.845085001075432], 
"eval_len": [21, 213, 315, 247, 20, 114, 788, 20, 415, 21]}

 63%|██████▎   | 629997/1000000 [7:15:32<3:20:43, 30.72it/s]global step 630000, trans_decision ep_re 46.595358132802154

{"global_step": 630000, "eval_re": [12.842001879941227, 272.74975978945054, 
13.153380490753724, 11.198844892288097, 12.532256001646974, 11.437353320390377, 
9.851633981971991, 94.15419639107076, 15.576138503109975, 12.458016077397916], 
"eval_len": [22, 147, 22, 25, 23, 22, 19, 115, 23, 21]}

 64%|██████▍   | 639997/1000000 [7:22:27<3:11:59, 31.25it/s]global step 640000, trans_decision ep_re 23.2624607725929

{"global_step": 640000, "eval_re": [10.851661814167434, 132.21684484773206, 
12.31155354053954, 14.878932883161477, 11.609929994122398, 12.020167533225015, 
7.233451465776636, 9.754581302788257, 8.41299091064884, 13.33449343376733], 
"eval_len": [20, 94, 22, 23, 22, 23, 21, 21, 19, 23]}

 65%|██████▍   | 649997/1000000 [7:29:26<3:10:02, 30.70it/s]global step 650000, trans_decision ep_re 59.13440222355537

{"global_step": 650000, "eval_re": [9.917502068651265, 9.66757669990343, 
12.10899188021964, 217.85563116383943, 10.178237416646958, 8.170615183016992, 
11.535961208866553, 9.101710388143141, 290.4062463232597, 12.401549903006492], 
"eval_len": [23, 20, 21, 292, 20, 19, 23, 20, 138, 23]}

 66%|██████▌   | 659997/1000000 [7:36:40<3:01:34, 31.21it/s]global step 660000, trans_decision ep_re 764.9277960574751

{"global_step": 660000, "eval_re": [4119.367890982861, 13.29933875835638, 
6.670112367202499, 12.686798554311073, 6.5117577073479636, 13.08124963024156, 
134.3065737648733, 3186.393625059387, 144.983592849343, 11.977020900827805], 
"eval_len": [980, 23, 21, 22, 19, 21, 182, 741, 102, 21]}

 67%|██████▋   | 669997/1000000 [7:43:26<2:53:25, 31.71it/s]global step 670000, trans_decision ep_re 263.204831687235

{"global_step": 670000, "eval_re": [136.8224122979674, 8.629494126341918, 
12.43527590014186, 9.451442985322425, 209.0637871587873, 144.0919244221742, 
9.546052831485888, 139.9867039667653, 11.878391736124525, 1950.1428314472391], 
"eval_len": [95, 20, 22, 21, 99, 79, 22, 97, 23, 477]}

 68%|██████▊   | 679999/1000000 [7:50:24<2:50:59, 31.19it/s]global step 680000, trans_decision ep_re 443.85818909936427

{"global_step": 680000, "eval_re": [1296.3385292324122, 161.96195643190217, 
12.84977889619841, 13.199690744996722, 878.84791352363, 2033.3892108814607, 
12.365063144142189, 10.81964685865324, 10.889005513255315, 7.921095766991852], 
"eval_len": [401, 99, 22, 24, 305, 562, 24, 22, 20, 20]}

 69%|██████▉   | 689997/1000000 [7:57:23<2:45:33, 31.21it/s]global step 690000, trans_decision ep_re 176.1791461841145

{"global_step": 690000, "eval_re": [13.41027441685885, 9.571557833949562, 
1429.420273113475, 7.496653218952517, 9.482299046531303, 10.634735402867847, 
12.998620103302864, 12.875114190158135, 153.4478585593264, 102.4540759557225], 
"eval_len": [22, 21, 430, 21, 22, 23, 23, 22, 91, 103]}

 70%|██████▉   | 699999/1000000 [8:04:40<2:40:51, 31.08it/s]global step 700000, trans_decision ep_re 1157.377873897982

{"global_step": 700000, "eval_re": [3287.5696536642013, 9.108484655775184, 
2346.855960946051, 11.753115975516943, 8.227657254014375, 10.03572095242211, 
12.083773252093575, 4039.9089564612095, 1212.4484674149498, 635.7869484035855], 
"eval_len": [755, 20, 599, 21, 19, 20, 21, 888, 347, 268]}

 71%|███████   | 709997/1000000 [8:11:40<2:35:28, 31.09it/s]global step 710000, trans_decision ep_re 1416.4502272284055

{"global_step": 710000, "eval_re": [4542.663986378533, 8.623386916429913, 
1690.8935857023423, 4477.850783045767, 7.1165133559712705, 10.734833126547752, 
159.1532398953068, 3250.7230731172685, 11.02532581614655, 5.717544929740934], 
"eval_len": [1000, 20, 448, 1000, 22, 21, 90, 762, 21, 21]}

 72%|███████▏  | 719997/1000000 [8:18:26<2:32:27, 30.61it/s]global step 720000, trans_decision ep_re 190.05353005621419

{"global_step": 720000, "eval_re": [1420.393211080871, 78.01643303224779, 
6.7035312575937915, 8.652209567838558, 9.557192884564415, 15.077817337873336, 
217.0906502682797, 11.67601441846892, 124.82273764512432, 8.545503069280215], 
"eval_len": [391, 96, 17, 21, 22, 23, 132, 21, 76, 19]}

 73%|███████▎  | 729999/1000000 [8:25:40<2:25:02, 31.03it/s]global step 730000, trans_decision ep_re 1211.116196609581

{"global_step": 730000, "eval_re": [8.751878316777992, 7.942172387483199, 
3746.026100387564, 10.206188418932967, 152.30602170086897, 145.67807351149128, 
1791.5365565589568, 4212.665266378161, 523.1103493967628, 1512.9393590388115], 
"eval_len": [20, 19, 846, 24, 103, 114, 484, 1000, 207, 441]}

 74%|███████▍  | 739997/1000000 [8:32:22<2:18:53, 31.20it/s]global step 740000, trans_decision ep_re 415.1346234223089

{"global_step": 740000, "eval_re": [8.186761890038696, 8.69478097392973, 
2691.08917944383, 255.93055521995072, 1145.0981028797557, 9.10130918008467, 
6.86719114566418, 9.31813587973042, 5.775814151290156, 11.284403458814351], 
"eval_len": [20, 21, 649, 144, 343, 20, 17, 22, 22, 22]}

 75%|███████▍  | 749999/1000000 [8:39:30<2:13:58, 31.10it/s]global step 750000, trans_decision ep_re 1390.7252283071568

{"global_step": 750000, "eval_re": [1347.4697616696596, 4070.277845717934, 
2944.9711112054792, 12.430639451451661, 10.534475761905647, 121.0143168418668, 
2928.60632655747, 10.278221431471513, 168.99555962860927, 2292.674024805722], 
"eval_len": [413, 1000, 711, 22, 21, 82, 691, 20, 101, 609]}

 76%|███████▌  | 759997/1000000 [8:46:30<2:08:47, 31.06it/s]global step 760000, trans_decision ep_re 10.70588487167096

{"global_step": 760000, "eval_re": [11.66272362384013, 9.324201416054786, 
7.68149385128109, 9.535395072391125, 12.654217585475616, 13.798955771490094, 
10.571262391602612, 13.291179150774981, 8.829616809947305, 9.709803043851862], 
"eval_len": [21, 21, 19, 22, 24, 23, 22, 23, 21, 19]}

 77%|███████▋  | 769999/1000000 [8:53:30<2:04:27, 30.80it/s]global step 770000, trans_decision ep_re 1288.1753765247101

{"global_step": 770000, "eval_re": [12.169982725565296, 10.779558799866303, 
7.829601126126272, 667.1317393706311, 6.532291182958422, 4714.148808788093, 
3049.8668456759297, 14.8269386218656, 4388.149720724993, 10.318278231074038], 
"eval_len": [23, 21, 23, 256, 19, 1000, 726, 23, 1000, 21]}

 78%|███████▊  | 779997/1000000 [9:00:13<1:57:25, 31.23it/s]global step 780000, trans_decision ep_re 150.09984335973954

{"global_step": 780000, "eval_re": [1402.5501798529922, 10.744178069027681, 
12.440364813831824, 12.565353683148611, 9.666080804480277, 13.113200478684838, 
6.653401751058332, 11.911529519204226, 8.963812214974693, 12.39033240999265], 
"eval_len": [361, 22, 23, 23, 22, 24, 18, 22, 19, 21]}

 79%|███████▉  | 789999/1000000 [9:07:06<1:49:18, 32.02it/s]global step 790000, trans_decision ep_re 2.6019524640079874

{"global_step": 790000, "eval_re": [1.48731240938994, 7.38851789569673, 
1.312031061123147, 2.147791958238357, 0.03444824653539014, 3.7827624786617675, 
-0.12913525061635323, 2.1924020980069345, 7.984347430642444, 
-0.1809536875984833], "eval_len": [34, 19, 35, 32, 31, 32, 30, 31, 32, 34]}

 80%|███████▉  | 799999/1000000 [9:14:10<1:47:22, 31.04it/s]global step 800000, trans_decision ep_re 119.84189488522695

{"global_step": 800000, "eval_re": [13.579345845412494, 14.5213692999318, 
10.06147390747119, 15.754213294320767, 10.339926128651971, 1086.1849289929748, 
17.422799902816543, 12.004482045249587, 5.817917726611013, 12.732491708829281], 
"eval_len": [22, 23, 21, 24, 20, 426, 26, 22, 18, 22]}

 81%|████████  | 809997/1000000 [9:20:53<1:41:34, 31.17it/s]global step 810000, trans_decision ep_re 241.53120006244666

{"global_step": 810000, "eval_re": [10.640765241918375, 12.485467563280661, 
2301.3958425323954, 13.568245649779042, 11.082496024796706, 11.9861864219524, 
13.614317980414219, 15.475524313462964, 15.775792245485665, 9.287362650981205], 
"eval_len": [20, 21, 556, 22, 20, 21, 23, 24, 23, 20]}

 82%|████████▏ | 819999/1000000 [9:27:47<1:34:41, 31.68it/s]global step 820000, trans_decision ep_re 55.95615463503886

{"global_step": 820000, "eval_re": [11.91008444978068, 12.170131309443443, 
137.5015591317765, 13.04189511533968, 15.271529982147445, 132.88034335841837, 
209.6783796849723, 13.560252275295335, 3.3980780563447817, 10.14929298687], 
"eval_len": [22, 21, 111, 22, 23, 108, 120, 23, 21, 19]}

 83%|████████▎ | 829999/1000000 [9:34:41<1:30:23, 31.35it/s]global step 830000, trans_decision ep_re 91.03242700136835

{"global_step": 830000, "eval_re": [13.431958007153206, 126.76764386073306, 
116.93928119933838, 9.130358585789939, 10.178223481911381, 10.25059793203298, 
8.217900347593309, 130.34719206824658, 8.062923593303173, 476.9981909375814], 
"eval_len": [24, 78, 78, 21, 22, 23, 22, 75, 19, 192]}

 84%|████████▍ | 839999/1000000 [9:41:33<1:23:30, 31.93it/s]global step 840000, trans_decision ep_re 254.64452147780602

{"global_step": 840000, "eval_re": [3.5162255742022683, 8.832213871016135, 
10.363026628291992, 2170.4507026530136, 12.598346535725895, 11.278253947693013, 
11.855227193593846, 12.595818797947615, 172.3410300418935, 132.61436953468183], 
"eval_len": [17, 19, 21, 550, 22, 21, 22, 22, 96, 81]}

 85%|████████▍ | 849997/1000000 [9:48:40<1:20:28, 31.07it/s]global step 850000, trans_decision ep_re 1270.4289560927073

{"global_step": 850000, "eval_re": [13.394507976383794, 10.989637323759162, 
14.411452312468178, 4711.073623325411, 106.86858239170869, 1477.0782242801645, 
4968.739037653947, 7.012909284381804, 1386.4796726074517, 8.241913771397575], 
"eval_len": [22, 21, 23, 1000, 72, 398, 1000, 19, 411, 19]}

 86%|████████▌ | 859997/1000000 [9:55:31<1:14:13, 31.43it/s]global step 860000, trans_decision ep_re 65.94142995928765

{"global_step": 860000, "eval_re": [12.29446917656918, 14.1824414420488, 
133.62113205511562, 13.191161331585267, 7.975853178447027, 8.89827274394725, 
279.95322918563926, 164.61770332760122, 14.660953567269415, 10.019083584653393],
"eval_len": [22, 23, 82, 23, 19, 21, 122, 104, 24, 20]}

 87%|████████▋ | 869997/1000000 [10:02:23<1:08:30, 31.63it/s]global step 870000, trans_decision ep_re 101.63427681772092

{"global_step": 870000, "eval_re": [11.77761740042384, 15.482493267670247, 
8.659985557089904, 12.455186952805127, 8.797225269110154, 11.965378890167708, 
7.860873736874066, 8.862890098037091, 922.5106763596464, 7.970440645384614], 
"eval_len": [23, 24, 19, 22, 20, 22, 19, 21, 269, 21]}

 88%|████████▊ | 879997/1000000 [10:09:30<1:03:30, 31.49it/s]global step 880000, trans_decision ep_re 531.3012802776803

{"global_step": 880000, "eval_re": [11.454535509256107, 1443.6544718681828, 
382.89891260367887, 133.15814831477476, 76.48835852947252, 416.0721858487884, 
7.150552304070366, 11.32217658794454, 2822.4017687398377, 8.41169247079661], 
"eval_len": [20, 385, 167, 127, 64, 204, 19, 23, 624, 20]}

 89%|████████▉ | 889997/1000000 [10:16:30<58:17, 31.45it/s]global step 890000, trans_decision ep_re 1005.3564228807879

{"global_step": 890000, "eval_re": [117.00415554253628, 4359.962789575657, 
10.966593229998711, 10.896292475300228, 13.63754448038577, 1878.8436144545421, 
10.639471623194995, 224.21024540569766, 12.021294365663545, 3415.382227654904], 
"eval_len": [112, 934, 22, 22, 23, 462, 22, 117, 21, 797]}

 90%|████████▉ | 899997/1000000 [10:23:15<52:05, 32.00it/s]global step 900000, trans_decision ep_re 218.25482006495326

{"global_step": 900000, "eval_re": [8.663834570890065, 819.4613617909176, 
11.458861025365433, 5.753439273532251, 11.98697912835698, 11.114580346799642, 
75.46979759519849, 1218.2972792001995, 8.276748431361513, 12.065319286910878], 
"eval_len": [19, 368, 20, 18, 22, 22, 89, 350, 19, 21]}

 91%|█████████ | 909999/1000000 [10:30:14<48:01, 31.24it/s]global step 910000, trans_decision ep_re 533.4418931033917

{"global_step": 910000, "eval_re": [11.46748784708273, 1751.6905212656793, 
2018.6096582180626, 98.75199443432304, 10.872346470147617, 9.994356424646151, 
178.1561151098236, 8.396041614091958, 10.394393349529238, 1236.0860163005304], 
"eval_len": [22, 459, 502, 95, 23, 21, 110, 21, 23, 334]}

 92%|█████████▏| 919997/1000000 [10:37:30<43:57, 30.33it/s]global step 920000, trans_decision ep_re 753.9261597752959

{"global_step": 920000, "eval_re": [2091.869464771824, 3589.378402973986, 
7.136588528551194, 8.784232027852358, 9.496126745745494, 17.879956936792347, 
1597.643246776022, 190.73890298356542, 11.421845203342343, 14.912830805279683], 
"eval_len": [582, 854, 18, 20, 20, 23, 412, 122, 23, 22]}

 93%|█████████▎| 929997/1000000 [10:44:30<37:03, 31.48it/s]global step 930000, trans_decision ep_re 992.3482622660307

{"global_step": 930000, "eval_re": [11.450886358581077, 1862.1561942595547, 
8.17789308407433, 214.5654656235514, 12.370310204048646, 11.95758555676685, 
3401.0584189079414, 12.101375964761184, 10.851546007474447, 4378.792946693554], 
"eval_len": [21, 470, 21, 117, 21, 24, 779, 22, 22, 931]}

 94%|█████████▍| 939997/1000000 [10:51:30<32:21, 30.91it/s]global step 940000, trans_decision ep_re 449.54853411628574

{"global_step": 940000, "eval_re": [6.835404039889325, 9.058827377810575, 
180.65828706571594, 6.806872534447745, 3360.5728015397626, 15.48575360232402, 
13.22669020350801, 756.0667385535459, 137.917581431662, 8.856384814191475], 
"eval_len": [19, 22, 96, 18, 730, 23, 21, 252, 89, 19]}

 95%|█████████▍| 949997/1000000 [10:58:21<26:48, 31.09it/s]global step 950000, trans_decision ep_re 383.21960117498696

{"global_step": 950000, "eval_re": [6.466656320937715, 142.65164112910384, 
13.407167479407304, 8.779746952560398, 1509.6991344514897, 262.77788228978915, 
1393.0783530890815, 9.896718627817382, 475.1992112085631, 10.23950020111955], 
"eval_len": [18, 95, 21, 22, 372, 138, 365, 20, 179, 22]}

 96%|█████████▌| 959999/1000000 [11:05:23<21:31, 30.98it/s]global step 960000, trans_decision ep_re 43.63634947971085

{"global_step": 960000, "eval_re": [11.687359481244183, 11.29176245593425, 
13.63625909393487, 183.71686415436997, 10.260303576158146, 11.899806945400684, 
156.00443752048068, 10.951585115146779, 13.635374146697124, 13.279742307741813],
"eval_len": [23, 22, 23, 121, 20, 22, 102, 22, 23, 22]}

 97%|█████████▋| 969999/1000000 [11:12:30<16:11, 30.88it/s]global step 970000, trans_decision ep_re 389.00866863845135

{"global_step": 970000, "eval_re": [1319.55583389195, 1348.6321495138843, 
10.376127028574007, 14.827863697147823, 162.98088003651588, 837.596486437813, 
8.996861940105498, 6.479531171778708, 13.423588624075313, 167.21736404266875], 
"eval_len": [349, 352, 20, 22, 113, 268, 18, 21, 23, 119]}

 98%|█████████▊| 979997/1000000 [11:19:20<10:44, 31.05it/s]global step 980000, trans_decision ep_re 562.2721125584853

{"global_step": 980000, "eval_re": [10.60091472872667, 169.55746060485322, 
285.35260185289934, 15.475218424059952, 1875.2946555307233, 12.965654525170118, 
115.37578797784337, 809.2164449585457, 11.359507480439861, 2317.522879501592], 
"eval_len": [22, 102, 170, 22, 459, 21, 82, 249, 24, 511]}

 99%|█████████▉| 989999/1000000 [11:26:24<05:23, 30.88it/s]global step 990000, trans_decision ep_re 433.5505692564408

{"global_step": 990000, "eval_re": [38.22431230789542, 8.98793709641889, 
11.483278249732084, 12.474308156109549, 4.801781252871411, 4193.576319994657, 
10.601799260588226, 5.883626478375534, 41.37523430814972, 8.097095459610705], 
"eval_len": [57, 18, 24, 23, 15, 971, 22, 19, 56, 19]}

100%|█████████▉| 999997/1000000 [11:33:25<00:00, 31.37it/s]global step 1000000, trans_decision ep_re 266.655572197021

{"global_step": 1000000, "eval_re": [1195.6577313138835, 167.4210668154402, 
336.60390782829694, 8.213557878988174, 13.492159997865395, 10.07740859426712, 
7.879114252867385, 7.846501617758841, 127.11747830114078, 792.2467953697018], 
"eval_len": [351, 103, 159, 20, 23, 23, 21, 21, 109, 259]}

100%|██████████| 1000000/1000000 [11:33:38<00:00, 24.03it/s]
