
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.05
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:22<8:30:42, 32.31it/s]global step 10000, trans_decision ep_re 100.18630085673486

{"global_step": 10000, "eval_re": [186.75779443274482, 28.416346872975765, 
26.901020061540873, 26.279950624194583, 200.25428903077992, 36.50432589417012, 
214.87586554887255, 73.6770754225554, 28.943067937183358, 179.25327274233118], 
"eval_len": [106, 34, 31, 32, 110, 38, 119, 182, 33, 97]}

  2%|▏         | 19999/1000000 [10:20<8:29:09, 32.08it/s]global step 20000, trans_decision ep_re 144.69478766571405

{"global_step": 20000, "eval_re": [12.962855730894876, 226.88436117670017, 
92.02960191791063, 207.20587685701346, 263.3231878165412, 196.40128155661696, 
13.850458101166886, 189.67660732397667, 15.709814386308837, 228.90383179001063],
"eval_len": [22, 124, 70, 126, 167, 91, 24, 97, 25, 136]}

  3%|▎         | 29997/1000000 [16:54<8:23:22, 32.12it/s]global step 30000, trans_decision ep_re 198.42192994247722

{"global_step": 30000, "eval_re": [220.79143027182434, 331.10141417569577, 
261.6350161458865, 238.06799056672403, 211.4518697856577, 13.518627011869125, 
226.53359380357003, 14.019551670686303, 275.8104637688331, 191.28934222402566], 
"eval_len": [128, 148, 174, 124, 152, 26, 116, 26, 137, 105]}

  4%|▍         | 39996/1000000 [23:40<8:16:49, 32.20it/s]global step 40000, trans_decision ep_re 157.55845717336294

{"global_step": 40000, "eval_re": [157.7561033871693, 160.74978464057273, 
128.19245416441615, 270.22227535738324, 163.56836670605057, 150.2035162748882, 
157.87270871236427, 107.45698867640857, 142.81029594657076, 136.75207786780587],
"eval_len": [97, 96, 91, 112, 84, 87, 75, 87, 85, 90]}

  5%|▍         | 49997/1000000 [30:25<8:14:24, 32.02it/s]global step 50000, trans_decision ep_re 106.78468052797658

{"global_step": 50000, "eval_re": [218.55892276930345, 14.757108202866377, 
17.869196762656824, 16.853769735654744, 239.9956026471447, 26.180838002194832, 
474.21586602680253, 19.85722500335348, 15.327071677649604, 24.231204452139057], 
"eval_len": [100, 23, 25, 26, 224, 32, 323, 28, 23, 33]}

  6%|▌         | 59999/1000000 [37:11<8:04:36, 32.33it/s]global step 60000, trans_decision ep_re 165.7094308887884

{"global_step": 60000, "eval_re": [21.59714987930638, 185.58430975212593, 
377.67474040657476, 257.6941072073404, 311.1921566848128, 170.24561929264308, 
17.301195659194107, 119.40283852499671, 16.692287634240312, 179.7099038466494], 
"eval_len": [34, 113, 223, 146, 152, 92, 30, 80, 30, 113]}

  7%|▋         | 69997/1000000 [44:10<8:16:32, 31.22it/s]global step 70000, trans_decision ep_re 74.36637745448346

{"global_step": 70000, "eval_re": [19.471998557076024, 23.956791978173914, 
22.39925327482824, 168.0178264227225, 22.39418972255003, 124.35464512659028, 
18.880635103460932, 152.57072386523868, 171.89732469349238, 19.720385800701745],
"eval_len": [25, 30, 29, 104, 29, 137, 26, 169, 109, 28]}

  8%|▊         | 79997/1000000 [50:47<8:01:03, 31.87it/s]global step 80000, trans_decision ep_re 115.85996204532769

{"global_step": 80000, "eval_re": [20.6232581679336, 144.4230694886185, 
500.77927223385717, 18.735818237702826, 15.074974176737266, 13.211587292257555, 
13.014888169736498, 21.057301118558588, 394.8967944177162, 16.782657150158666], 
"eval_len": [26, 216, 285, 26, 24, 22, 22, 28, 215, 24]}

  9%|▉         | 89999/1000000 [57:33<7:51:17, 32.18it/s]global step 90000, trans_decision ep_re 45.42000326814478

{"global_step": 90000, "eval_re": [12.614353731634985, 20.591562097989247, 
19.512646289392368, 16.81476357524362, 20.005974160045675, 16.640854507230138, 
19.997329739084957, 58.97019371463179, 12.499701113929884, 256.5526537522652], 
"eval_len": [22, 27, 25, 25, 27, 23, 26, 119, 22, 300]}

 10%|▉         | 99998/1000000 [1:04:30<7:52:33, 31.74it/s]global step 100000, trans_decision ep_re 111.21034745112918

{"global_step": 100000, "eval_re": [103.93666646908923, 115.25776137498214, 
18.85458913287004, 53.1338855685759, 476.54831266340256, 57.94686230874535, 
17.635036714209424, 19.12092378117608, 108.99184300749717, 140.67759349074385], 
"eval_len": [86, 115, 27, 50, 229, 122, 26, 26, 129, 162]}

 11%|█         | 109997/1000000 [1:11:05<7:43:51, 31.98it/s]global step 110000, trans_decision ep_re 76.23413041603038

{"global_step": 110000, "eval_re": [316.4705474447997, 17.080177824854275, 
17.51351514021732, 13.619486013190638, 17.590698659094237, 165.57693424190703, 
18.869245988134153, 17.99287445745474, 18.692747217660646, 158.93507717299102], 
"eval_len": [173, 24, 24, 22, 26, 112, 25, 27, 25, 139]}

 12%|█▏        | 119997/1000000 [1:17:51<7:38:00, 32.02it/s]global step 120000, trans_decision ep_re 48.22218721554616

{"global_step": 120000, "eval_re": [19.36569045924329, 13.580518203899876, 
12.994351949106635, 18.283687670978342, 84.71131617068349, 14.720297055758445, 
16.890840527749944, 13.312701327517573, 275.2622199613952, 13.100248829128684], 
"eval_len": [27, 23, 22, 24, 68, 24, 25, 22, 160, 22]}

 13%|█▎        | 129999/1000000 [1:24:36<7:33:06, 32.00it/s]global step 130000, trans_decision ep_re 177.56124707775797

{"global_step": 130000, "eval_re": [333.32291078088946, 16.28463237117046, 
239.1479443545109, 312.50225770943734, 17.51421436335758, 211.44716704551374, 
16.538703410469704, 324.52533647642866, 220.9926438883703, 83.33666037743151], 
"eval_len": [163, 25, 123, 149, 25, 224, 23, 227, 181, 75]}

 14%|█▍        | 139998/1000000 [1:31:20<7:22:29, 32.39it/s]global step 140000, trans_decision ep_re 325.50588201826594

{"global_step": 140000, "eval_re": [18.26858505042553, 15.52641979591468, 
14.620019194865792, 1149.32325267739, 406.10077174753036, 467.75173925471654, 
14.204642439629273, 526.5297295542688, 440.9832373611641, 201.7504231067545], 
"eval_len": [24, 25, 22, 397, 170, 202, 22, 217, 213, 140]}

 15%|█▍        | 149997/1000000 [1:38:06<7:15:25, 32.53it/s]global step 150000, trans_decision ep_re 50.78654877841704

{"global_step": 150000, "eval_re": [246.5167722033622, 15.75087789925268, 
17.660020068072296, 14.715513208459422, 13.84558721155952, 18.00349683934681, 
14.613445858808621, 21.78665647518782, 16.88110423160601, 128.0920137885151], 
"eval_len": [167, 24, 25, 24, 23, 24, 23, 26, 24, 94]}

 16%|█▌        | 159997/1000000 [1:44:51<7:20:03, 31.81it/s]global step 160000, trans_decision ep_re 264.24887256346597

{"global_step": 160000, "eval_re": [14.843274728813393, 593.312079095823, 
12.127518533079632, 178.97496440938846, 13.395086610335184, 16.070636823143175, 
1227.2459875519457, 553.6430542277207, 17.30035958282629, 15.575764071583858], 
"eval_len": [24, 236, 21, 125, 23, 25, 406, 217, 26, 25]}

 17%|█▋        | 169999/1000000 [1:51:38<7:12:43, 31.97it/s]global step 170000, trans_decision ep_re 96.78872953262095

{"global_step": 170000, "eval_re": [18.39113864320596, 649.8366000796143, 
11.59306767897511, 13.835789130400666, 18.29793817928256, 197.7906822032313, 
12.087470902309487, 16.474378718153307, 14.759000428106955, 14.821229362929762],
"eval_len": [24, 242, 21, 24, 25, 115, 21, 24, 24, 23]}

 18%|█▊        | 179999/1000000 [1:58:23<7:11:34, 31.67it/s]global step 180000, trans_decision ep_re 58.26466662089465

{"global_step": 180000, "eval_re": [17.214298790236402, 17.913248993701966, 
14.1422452216263, 175.75034885591293, 15.140526245932438, 14.578298742153088, 
18.188861439275406, 20.25569032990752, 15.386681529836503, 274.076466060364], 
"eval_len": [26, 27, 22, 109, 24, 23, 24, 27, 23, 157]}

 19%|█▉        | 189999/1000000 [2:05:20<7:03:28, 31.88it/s]global step 190000, trans_decision ep_re 381.3926642203729

{"global_step": 190000, "eval_re": [14.34783744557568, 1094.1709358900944, 
16.26754586547038, 14.25884673512471, 434.4952309377508, 15.960829164074605, 
16.400398130062605, 1350.8300066280856, 16.86312228775219, 840.3318891197391], 
"eval_len": [24, 347, 24, 23, 194, 25, 25, 461, 24, 326]}

 20%|█▉        | 199997/1000000 [2:12:10<6:57:58, 31.90it/s]global step 200000, trans_decision ep_re 372.80258316375637

{"global_step": 200000, "eval_re": [1185.1784744404752, 16.702050540341997, 
15.242937294187362, 428.26580336634623, 17.45247178831964, 1016.7442990815368, 
16.134323966857778, 995.5878676207889, 20.701161184513754, 16.01644235419665], 
"eval_len": [366, 24, 22, 172, 24, 311, 24, 311, 27, 25]}

 21%|██        | 209997/1000000 [2:19:00<6:49:26, 32.16it/s]global step 210000, trans_decision ep_re 756.0269452529413

{"global_step": 210000, "eval_re": [2262.096326456658, 15.22960184977796, 
14.537529634400439, 16.161642679497827, 14.534737697980415, 1594.7823037866904, 
16.813279009860153, 1774.9271779730896, 17.643211728778425, 1833.5436417126805],
"eval_len": [648, 25, 23, 23, 24, 476, 24, 498, 26, 488]}

 22%|██▏       | 219997/1000000 [2:25:50<6:50:44, 31.65it/s]global step 220000, trans_decision ep_re 209.7355411649615

{"global_step": 220000, "eval_re": [16.705242878155993, 15.802121176717707, 
350.9206085868066, 15.648493204352054, 1625.5323771590704, 13.655800411231697, 
13.16434653476568, 13.77216829607307, 15.667189082419416, 16.48706432002213], 
"eval_len": [26, 23, 170, 25, 439, 22, 22, 22, 25, 26]}

 23%|██▎       | 229997/1000000 [2:32:25<6:39:43, 32.11it/s]global step 230000, trans_decision ep_re 33.45505000237107

{"global_step": 230000, "eval_re": [19.019307346488887, 17.83426059857529, 
14.184080646677334, 15.399608252269532, 16.36577900023608, 13.937070446961812, 
175.34109850910096, 15.812222196814632, 14.037784813680839, 32.619288212905325],
"eval_len": [24, 27, 22, 24, 23, 22, 79, 25, 23, 40]}

 24%|██▍       | 239997/1000000 [2:39:10<6:33:35, 32.18it/s]global step 240000, trans_decision ep_re 413.5586559710232

{"global_step": 240000, "eval_re": [1599.5174752003127, 768.9649602013086, 
16.742238011148075, 15.1667027664593, 16.655486128904144, 15.159713388281947, 
12.658929223706117, 17.12458416540222, 1656.5916006676869, 17.004869957021583], 
"eval_len": [424, 243, 26, 24, 27, 23, 22, 24, 428, 25]}

 25%|██▍       | 249999/1000000 [2:45:58<6:27:24, 32.27it/s]global step 250000, trans_decision ep_re 86.17885734602636

{"global_step": 250000, "eval_re": [15.881651644253408, 13.029553062268846, 
17.256691272590814, 16.688753865095727, 15.250598497272563, 12.821142367521844, 
14.963557861997575, 14.426332102559412, 725.7770308626086, 15.693261924094859], 
"eval_len": [24, 22, 27, 25, 24, 22, 24, 24, 238, 26]}

 26%|██▌       | 259999/1000000 [2:52:43<6:25:45, 31.97it/s]global step 260000, trans_decision ep_re 255.45120176589398

{"global_step": 260000, "eval_re": [16.752884429501254, 16.767100640863568, 
1297.8831219534854, 17.09963526783943, 15.094737496439906, 16.06470547548324, 
15.56537281683053, 1126.821228626343, 16.463848786829978, 15.999382165322958], 
"eval_len": [27, 26, 488, 25, 25, 25, 26, 326, 25, 26]}

 27%|██▋       | 269998/1000000 [2:59:30<6:23:51, 31.70it/s]global step 270000, trans_decision ep_re 495.48739363668375

{"global_step": 270000, "eval_re": [15.076102703685676, 187.0445891244914, 
164.65915702665635, 13.471259426254163, 15.830596742319035, 17.457807713110356, 
14.763451551771999, 4499.169561859914, 13.922891812532098, 13.478518406102346], 
"eval_len": [24, 102, 116, 26, 26, 26, 22, 1000, 23, 23]}

 28%|██▊       | 279997/1000000 [3:06:30<6:14:24, 32.05it/s]global step 280000, trans_decision ep_re 467.89756829256237

{"global_step": 280000, "eval_re": [13.656620546050906, 15.862728937400222, 
15.086078461496692, 13.480013881005073, 13.87776432064354, 15.226647421572146, 
16.98418748900672, 13.746557766812286, 14.36749476474082, 4546.687589336895], 
"eval_len": [22, 26, 25, 23, 22, 23, 24, 22, 23, 1000]}

 29%|██▉       | 289997/1000000 [3:13:20<6:08:23, 32.12it/s]global step 290000, trans_decision ep_re 768.5783291318893

{"global_step": 290000, "eval_re": [13.652644758605703, 2041.8583232331996, 
1253.390755083249, 873.5991333932793, 1617.063871572042, 15.659413953165437, 
1829.0369671419846, 16.53485602881069, 12.678855752229428, 12.308470402326316], 
"eval_len": [22, 476, 349, 276, 423, 25, 449, 25, 21, 21]}

 30%|██▉       | 299997/1000000 [3:20:10<6:02:50, 32.15it/s]global step 300000, trans_decision ep_re 195.26801743401663

{"global_step": 300000, "eval_re": [12.588849272798715, 1487.8885252600387, 
74.46108896884336, 11.72237426624797, 15.117806520456403, 85.13658810632266, 
218.98702067702058, 14.324890645691443, 17.029296957365645, 15.423733665380684],
"eval_len": [22, 406, 59, 21, 24, 63, 122, 23, 24, 25]}

 31%|███       | 309997/1000000 [3:26:46<6:02:34, 31.72it/s]global step 310000, trans_decision ep_re 212.37769539833417

{"global_step": 310000, "eval_re": [1990.1207373590246, 18.216391355071888, 
13.134133514203377, 15.70636013838491, 13.412950863736317, 16.036417411503276, 
15.19610258289357, 15.116463584127988, 13.255105909729156, 13.582291264666903], 
"eval_len": [516, 26, 23, 25, 23, 23, 26, 24, 22, 23]}

 32%|███▏      | 319999/1000000 [3:33:34<5:56:31, 31.79it/s]global step 320000, trans_decision ep_re 582.8728235993511

{"global_step": 320000, "eval_re": [2152.4194604298286, 11.965805530112252, 
14.316840459880256, 15.058981482990745, 186.47146210108923, 171.06194790762157, 
12.666549398753528, 3237.6246574300894, 13.287827988691074, 13.8547032644549], 
"eval_len": [547, 23, 24, 23, 110, 111, 23, 699, 23, 24]}

 33%|███▎      | 329998/1000000 [3:40:24<5:46:09, 32.26it/s]global step 330000, trans_decision ep_re 558.9025649960961

{"global_step": 330000, "eval_re": [1213.755887126618, 151.39956073494452, 
16.853803697116724, 13.112856303944644, 13.328100721664583, 173.80389504191228, 
13.466144293037148, 15.347363679287637, 2610.4423638035005, 1367.515674558935], 
"eval_len": [355, 90, 24, 23, 22, 95, 23, 25, 600, 369]}

 34%|███▍      | 339999/1000000 [3:47:14<5:41:45, 32.19it/s]global step 340000, trans_decision ep_re 192.37489586995773

{"global_step": 340000, "eval_re": [15.955714684587752, 12.75412437059518, 
621.5577460538241, 14.692402297217829, 14.44359001954083, 13.269039799318994, 
12.703276275038235, 1189.4332554128555, 13.641299286395132, 15.298510500203648],
"eval_len": [25, 22, 230, 23, 22, 22, 23, 340, 23, 22]}

 35%|███▍      | 349998/1000000 [3:54:10<5:37:47, 32.07it/s]global step 350000, trans_decision ep_re 507.4478637818671

{"global_step": 350000, "eval_re": [11.625091046746524, 15.155547887610814, 
1866.6536340893551, 14.215029242684551, 13.552362287080815, 13.532449242050546, 
15.996395030462068, 16.12837724357374, 13.840738329668822, 3093.779013419438], 
"eval_len": [20, 23, 474, 23, 24, 23, 24, 24, 23, 719]}

 36%|███▌      | 359997/1000000 [4:01:00<5:34:29, 31.89it/s]global step 360000, trans_decision ep_re 408.59477969708524

{"global_step": 360000, "eval_re": [96.63117634310325, 17.24263584478994, 
14.111054007953857, 1630.6068993955469, 15.23452627637155, 1399.3214194487662, 
13.123432293990724, 15.997648084129192, 14.753392649655384, 868.9256126265457], 
"eval_len": [61, 25, 25, 388, 25, 378, 22, 25, 23, 283]}

 37%|███▋      | 369997/1000000 [4:07:36<5:30:06, 31.81it/s]global step 370000, trans_decision ep_re 375.87727546338596

{"global_step": 370000, "eval_re": [14.906711272028215, 13.98147233875245, 
15.370217276332204, 3246.8076448072775, 164.34138585120314, 13.153026262187621, 
14.48993810902854, 248.38924533253947, 13.737927489762171, 13.595185894748127], 
"eval_len": [23, 24, 24, 720, 107, 23, 23, 122, 22, 24]}

 38%|███▊      | 379999/1000000 [4:14:40<5:26:34, 31.64it/s]global step 380000, trans_decision ep_re 859.3148438720239

{"global_step": 380000, "eval_re": [15.712965898696183, 743.1932588308966, 
1162.5578909753895, 1321.7837378655736, 1401.8055888189572, 986.7599827160778, 
13.930104798416599, 209.05234943555558, 1073.7616937180558, 1664.5908656626193],
"eval_len": [24, 241, 342, 348, 356, 302, 23, 110, 317, 409]}

 39%|███▉      | 389997/1000000 [4:21:18<5:16:40, 32.10it/s]global step 390000, trans_decision ep_re 137.2362641310966

{"global_step": 390000, "eval_re": [24.193384679988583, 10.841113821334538, 
13.531140363706863, 25.84407709959786, 10.426816838450247, 49.183011928719054, 
27.78841077552847, 13.457780438634313, 37.65457595895885, 1159.442329406047], 
"eval_len": [39, 22, 24, 40, 22, 45, 41, 25, 45, 353]}

 40%|███▉      | 399999/1000000 [4:28:05<5:11:25, 32.11it/s]global step 400000, trans_decision ep_re 201.75186317359996

{"global_step": 400000, "eval_re": [13.629138195134782, 13.484406626229138, 
13.739339619411526, 13.396158436661915, 1248.4473047792296, 15.152277823640855, 
14.062494053983714, 148.42178113923157, 523.7366464841323, 13.449084578344136], 
"eval_len": [25, 23, 22, 24, 337, 24, 23, 101, 208, 24]}

 41%|████      | 409998/1000000 [4:35:10<5:09:47, 31.74it/s]global step 410000, trans_decision ep_re 1146.5886039363963

{"global_step": 410000, "eval_re": [1172.1503032075616, 2159.501010819946, 
3074.138161056051, 11.83221289814796, 12.743619522245227, 13.219647386234259, 
1737.8458401337386, 13.179711014909685, 3044.5975599966264, 226.67797332850444],
"eval_len": [341, 502, 656, 22, 22, 22, 395, 24, 650, 126]}

 42%|████▏     | 419997/1000000 [4:42:00<5:00:55, 32.12it/s]global step 420000, trans_decision ep_re 764.5143655730492

{"global_step": 420000, "eval_re": [1176.1098157665774, 3742.839763834493, 
15.66335656322897, 920.9867533269436, 17.159637504262747, 1384.012807711796, 
142.76316977392403, 17.75416929028822, 16.130006547962534, 211.7241754110155], 
"eval_len": [332, 849, 24, 271, 27, 362, 90, 26, 26, 122]}

 43%|████▎     | 429997/1000000 [4:48:50<4:53:31, 32.37it/s]global step 430000, trans_decision ep_re 788.787791785536

{"global_step": 430000, "eval_re": [11.560928144111864, 186.54752381915617, 
801.3544061179217, 1325.159632526693, 2304.8973054237526, 1309.7197788903975, 
11.859623328479534, 13.404458837242952, 1911.0541226919363, 12.320138075669382],
"eval_len": [24, 99, 237, 345, 574, 413, 24, 24, 459, 22]}

 44%|████▍     | 439997/1000000 [4:55:22<4:49:12, 32.27it/s]global step 440000, trans_decision ep_re 342.6837829797716

{"global_step": 440000, "eval_re": [14.992448210898829, 17.67497048127687, 
18.629125166437476, 15.699360653096136, 143.92460080538407, 473.0935030072118, 
2691.307710753099, 17.354399732989076, 15.798808721132211, 18.36290226619103], 
"eval_len": [23, 25, 26, 25, 86, 181, 631, 28, 24, 25]}

 45%|████▍     | 449999/1000000 [5:02:07<4:44:31, 32.22it/s]global step 450000, trans_decision ep_re 195.0544837129621

{"global_step": 450000, "eval_re": [19.02417578579906, 18.555460944075854, 
16.783927509463005, 832.6006999492961, 19.63698452290395, 974.6118848667109, 
11.653259138293285, 20.185992258178494, 17.547556941792003, 19.944895213108648],
"eval_len": [27, 25, 24, 276, 30, 280, 21, 29, 27, 29]}

 46%|████▌     | 459998/1000000 [5:08:51<4:43:21, 31.76it/s]global step 460000, trans_decision ep_re 591.9085114748053

{"global_step": 460000, "eval_re": [162.43731920551102, 987.0930517634191, 
658.0768629201506, 1545.3844078007112, 217.15440067840456, 162.4597997462118, 
45.94551151080761, 1205.0690178549335, 181.84345186156858, 753.6212914063359], 
"eval_len": [83, 285, 229, 377, 107, 109, 186, 325, 88, 251]}

 47%|████▋     | 469999/1000000 [5:15:50<4:36:16, 31.97it/s]global step 470000, trans_decision ep_re 383.675988979856

{"global_step": 470000, "eval_re": [16.55617139839346, 145.01671591509862, 
14.467719174325113, 2406.8495057857263, 15.429326280088018, 16.246838920678396, 
12.520709204275512, 1181.1826213177094, 15.237705857686315, 13.252575944579068],
"eval_len": [26, 89, 23, 555, 24, 25, 22, 353, 24, 22]}

 48%|████▊     | 479997/1000000 [5:22:40<4:28:15, 32.31it/s]global step 480000, trans_decision ep_re 853.5322458931257

{"global_step": 480000, "eval_re": [220.08459839210383, 3291.295722461351, 
26.239358107737694, 140.0322419481039, 288.95967817905046, 22.03971147764294, 
21.65841491253455, 28.746075303893377, 4470.694874982544, 25.571783166294995], 
"eval_len": [143, 728, 31, 110, 166, 29, 27, 33, 1000, 32]}

 49%|████▉     | 489997/1000000 [5:29:13<4:23:17, 32.28it/s]global step 490000, trans_decision ep_re 623.8477682160171

{"global_step": 490000, "eval_re": [642.0769385491542, 219.60639808020994, 
1317.0952779224783, 9.742756519230575, 1794.28472255762, 184.79893725299868, 
257.51057498131144, 1468.2314720481986, 17.08901606767461, 328.04158818129486], 
"eval_len": [221, 127, 347, 19, 446, 123, 133, 417, 25, 160]}

 50%|████▉     | 499999/1000000 [5:36:02<4:19:43, 32.09it/s]global step 500000, trans_decision ep_re 574.3025999010216

{"global_step": 500000, "eval_re": [13.469032656385496, 15.368089877367456, 
3793.1311863774345, 18.234037484081522, 16.840452119589298, 161.61260469019334, 
153.44240995986536, 16.480862528152688, 11.998838439350116, 1542.4484848777968],
"eval_len": [22, 23, 748, 25, 24, 104, 96, 26, 22, 412]}

 51%|█████     | 509997/1000000 [5:42:50<4:13:00, 32.28it/s]global step 510000, trans_decision ep_re 555.0461093405129

{"global_step": 510000, "eval_re": [20.948257249841397, 3893.4047046692613, 
148.17602944970756, 1239.091171241875, 19.34335788493776, 16.681995090831933, 
19.55345864195597, 18.55169247563999, 18.458775765414135, 156.2516509356633], 
"eval_len": [28, 761, 64, 329, 28, 24, 27, 26, 24, 71]}

 52%|█████▏    | 519999/1000000 [5:49:50<4:12:01, 31.74it/s]global step 520000, trans_decision ep_re 950.8459997880475

{"global_step": 520000, "eval_re": [12.376727782726887, 5066.302337629668, 
13.130742695682137, 12.554742230630023, 13.60026511139282, 13.613145854571885, 
14.655147012330591, 4334.935356883059, 13.017978265641569, 14.273554414771764], 
"eval_len": [23, 1000, 23, 24, 25, 24, 25, 893, 24, 24]}

 53%|█████▎    | 529997/1000000 [5:56:40<4:04:12, 32.08it/s]global step 530000, trans_decision ep_re 486.0151179251081

{"global_step": 530000, "eval_re": [13.488805868226256, 14.489208211853152, 
14.661015773806342, 13.817166297295731, 12.747095615256043, 13.864912702986134, 
1504.268627113596, 3241.1177108256907, 14.795528899659688, 16.901107942710073], 
"eval_len": [25, 25, 26, 24, 24, 23, 375, 658, 24, 23]}

 54%|█████▍    | 539997/1000000 [6:03:30<3:57:59, 32.21it/s]global step 540000, trans_decision ep_re 468.01914358115135

{"global_step": 540000, "eval_re": [15.098003952487579, 15.042383401335021, 
17.67692480153601, 2113.599608200673, 14.732894108612095, 15.012852052447755, 
15.111946255854727, 1256.6750134867052, 12.60075634877962, 1204.6410532030823], 
"eval_len": [26, 24, 26, 466, 26, 25, 26, 368, 21, 339]}

 55%|█████▍    | 549997/1000000 [6:10:20<3:53:33, 32.11it/s]global step 550000, trans_decision ep_re 593.4860765266847

{"global_step": 550000, "eval_re": [15.143219566939214, 13.26980106318316, 
14.366690663943142, 15.554338887391832, 3276.664657069412, 15.132865334270162, 
16.30915997580102, 2538.511416245711, 13.188081495748989, 16.720534964445257], 
"eval_len": [26, 21, 24, 25, 705, 23, 24, 586, 22, 26]}

 56%|█████▌    | 559997/1000000 [6:17:10<3:47:25, 32.25it/s]global step 560000, trans_decision ep_re 683.0214038199013

{"global_step": 560000, "eval_re": [14.746958084863909, 210.37011971464412, 
15.729841384342937, 187.03077528597018, 14.809199427740788, 14.502728550946818, 
1910.9999780418732, 1880.8551010496708, 1169.0565508345671, 1412.1127858243917],
"eval_len": [23, 110, 25, 114, 25, 23, 437, 457, 328, 367]}

 57%|█████▋    | 569997/1000000 [6:24:00<3:44:04, 31.98it/s]global step 570000, trans_decision ep_re 322.8655235893069

{"global_step": 570000, "eval_re": [13.247608417657226, 15.940117587982632, 
309.1217579135162, 15.767667860168881, 13.98426913442973, 190.12298211384424, 
753.524620829724, 14.775623330676192, 1889.3382179417163, 12.832370763353522], 
"eval_len": [24, 24, 147, 25, 23, 123, 284, 25, 439, 22]}

 58%|█████▊    | 579997/1000000 [6:30:36<3:38:43, 32.00it/s]global step 580000, trans_decision ep_re 15.123345509857586

{"global_step": 580000, "eval_re": [15.666219881182656, 14.952219165023182, 
15.33926401430545, 14.543187050688873, 15.442993440680013, 14.999604968274625, 
15.054806897227243, 13.699546872924895, 16.233108637463477, 15.302504170805445],
"eval_len": [24, 24, 24, 24, 24, 24, 25, 23, 26, 24]}

 59%|█████▉    | 589997/1000000 [6:37:21<3:32:49, 32.11it/s]global step 590000, trans_decision ep_re 714.1649046098162

{"global_step": 590000, "eval_re": [4510.770138593511, 11.497987235199677, 
14.121320272756966, 13.873793832061626, 9.220100512373033, 13.534516146105766, 
672.0998931211384, 12.170139691599509, 207.18155251533824, 1677.1796041780779], 
"eval_len": [908, 23, 24, 24, 21, 26, 229, 22, 110, 381]}

 60%|█████▉    | 599999/1000000 [6:44:12<3:26:58, 32.21it/s]global step 600000, trans_decision ep_re 828.5422444504271

{"global_step": 600000, "eval_re": [1127.015899656629, 2127.5920111235378, 
13.02344268233293, 2430.5929443758646, 1107.486958746001, 11.347637614343427, 
354.31501905455144, 970.5470793755501, 10.786945122669588, 132.71450675279104], 
"eval_len": [305, 545, 25, 529, 288, 22, 171, 295, 22, 83]}

 61%|██████    | 609997/1000000 [6:51:03<3:23:31, 31.94it/s]global step 610000, trans_decision ep_re 15.193469526749874

{"global_step": 610000, "eval_re": [17.233039081897775, 14.849767830108847, 
15.0045845579386, 13.676227572561665, 17.214656998534934, 16.75295091442505, 
13.065433669115636, 14.807509933590662, 15.280720971135924, 14.049803738189642],
"eval_len": [26, 26, 23, 22, 25, 25, 22, 24, 24, 22]}

 62%|██████▏   | 619997/1000000 [6:57:48<3:20:32, 31.58it/s]global step 620000, trans_decision ep_re 176.12349384199194

{"global_step": 620000, "eval_re": [15.501082013539076, 13.428463400686887, 
13.306408066072892, 354.8059824680091, 17.34823715102705, 11.438039905411648, 
17.510157426654587, 16.431602853930553, 16.444652849163205, 1285.0203122854243],
"eval_len": [24, 22, 22, 167, 24, 21, 27, 25, 24, 349]}

 63%|██████▎   | 629999/1000000 [7:04:35<3:16:15, 31.42it/s]global step 630000, trans_decision ep_re 377.71873091071365

{"global_step": 630000, "eval_re": [315.4378793513261, 16.011022396847416, 
16.11954229644268, 14.75337127775954, 14.355338244237295, 18.082316844276072, 
13.740793190873243, 13.23629947789391, 801.7285790605131, 2553.722166966967], 
"eval_len": [143, 26, 24, 24, 25, 26, 22, 22, 237, 632]}

 64%|██████▍   | 639997/1000000 [7:11:23<3:05:38, 32.32it/s]global step 640000, trans_decision ep_re 518.6173966851244

{"global_step": 640000, "eval_re": [14.084415503362088, 15.336921131083653, 
164.197011805693, 14.745845527852305, 16.10644614197434, 14.680135571754363, 
1779.0093199248217, 1914.2399664758104, 1240.2495898045936, 13.524314964297929],
"eval_len": [24, 25, 100, 23, 26, 25, 435, 440, 345, 23]}

 65%|██████▍   | 649999/1000000 [7:18:30<3:02:16, 32.00it/s]global step 650000, trans_decision ep_re 1625.248021227227

{"global_step": 650000, "eval_re": [3663.54325687471, 16.008817233086397, 
15.466120612158544, 4880.15277352332, 15.962057082468425, 736.3401331017506, 
2848.2262588225576, 14.50302519139733, 162.00264272333618, 3900.275127107484], 
"eval_len": [763, 25, 24, 976, 24, 223, 613, 23, 101, 752]}

 66%|██████▌   | 659997/1000000 [7:25:20<2:55:40, 32.26it/s]global step 660000, trans_decision ep_re 1461.9387123438903

{"global_step": 660000, "eval_re": [4526.230691100094, 11.19746557240465, 
748.7930249731058, 966.6347227848889, 13.779392562845166, 1813.1528566387228, 
2020.319671823932, 2454.4865155095163, 1678.3493841223406, 386.443398351053], 
"eval_len": [971, 21, 254, 291, 28, 443, 469, 587, 432, 152]}

 67%|██████▋   | 669997/1000000 [7:32:10<2:51:30, 32.07it/s]global step 670000, trans_decision ep_re 1153.7315061139725

{"global_step": 670000, "eval_re": [539.1592830397683, 11.470072038532017, 
2802.736187258037, 5053.9478233173695, 15.702974987356725, 3049.3295074511916, 
15.069541391854107, 16.62373653018299, 18.33552504772094, 14.94041007771118], 
"eval_len": [202, 20, 659, 1000, 24, 626, 24, 23, 26, 26]}

 68%|██████▊   | 679997/1000000 [7:39:00<2:46:08, 32.10it/s]global step 680000, trans_decision ep_re 982.0120383042049

{"global_step": 680000, "eval_re": [2088.5279947666604, 199.7777603328465, 
2391.8515252506736, 12.504896438745185, 426.3233771597883, 14.015228488382473, 
11.972599993188657, 13.7320000402361, 12.094575999278208, 4649.320424572249], 
"eval_len": [465, 114, 524, 23, 218, 24, 23, 24, 23, 914]}

 69%|██████▉   | 689997/1000000 [7:45:50<2:39:29, 32.39it/s]global step 690000, trans_decision ep_re 1441.2241354105915

{"global_step": 690000, "eval_re": [2395.383716252784, 2213.1746938740184, 
2799.605351967796, 12.341352470153085, 1837.8919080319251, 11.475125474858116, 
2403.8914270890104, 2716.5126195685593, 10.940616420912963, 11.024542955897504],
"eval_len": [543, 505, 588, 23, 428, 22, 528, 567, 22, 23]}

 70%|██████▉   | 699997/1000000 [7:52:40<2:32:13, 32.85it/s]global step 700000, trans_decision ep_re 1282.138264526052

{"global_step": 700000, "eval_re": [2851.6799400464724, 151.10011666705242, 
3322.843357902029, 147.77679021062522, 1811.6739773737106, 159.31065346462168, 
150.82028950774276, 143.0636580299826, 4072.761184901845, 10.35267715643802], 
"eval_len": [656, 93, 698, 94, 402, 98, 96, 89, 853, 23]}

 71%|███████   | 709997/1000000 [7:59:30<2:28:34, 32.53it/s]global step 710000, trans_decision ep_re 1637.8761814904888

{"global_step": 710000, "eval_re": [4750.135816967171, 10.894779833661229, 
1011.7899167136372, 2187.28322756836, 12.644017549685481, 231.04287059961928, 
3097.5340207374766, 220.5981540212809, 1528.7093527045236, 3328.1296582094733], 
"eval_len": [1000, 22, 327, 469, 22, 129, 682, 123, 391, 674]}

 72%|███████▏  | 719997/1000000 [8:06:04<2:22:23, 32.77it/s]global step 720000, trans_decision ep_re 773.8297384269582

{"global_step": 720000, "eval_re": [408.8136630576477, 14.239782190216017, 
243.05580728708156, 169.67290334881216, 154.55629566607564, 2553.043345716202, 
2190.194607291312, 160.28881931455896, 1831.6231023400658, 12.809058057609382], 
"eval_len": [171, 25, 125, 92, 108, 540, 492, 88, 438, 23]}

 73%|███████▎  | 729999/1000000 [8:12:52<2:18:19, 32.53it/s]global step 730000, trans_decision ep_re 560.2940383580315

{"global_step": 730000, "eval_re": [14.290301511668297, 14.756762191266203, 
16.450275565500466, 15.445273539211932, 2765.24978824462, 2718.3194292840603, 
13.60285032104753, 13.641695999056864, 16.346144745972456, 14.837862177910573], 
"eval_len": [24, 24, 24, 25, 586, 560, 22, 22, 24, 23]}

 74%|███████▍  | 739998/1000000 [8:19:50<2:15:46, 31.92it/s]global step 740000, trans_decision ep_re 1351.4154069669073

{"global_step": 740000, "eval_re": [12.096951443605871, 2653.173099599465, 
801.0567501309287, 2950.436795219091, 1604.878070679243, 758.929112883265, 
13.514749274603426, 13.038680863788471, 4523.397094969084, 183.6327646059994], 
"eval_len": [22, 556, 297, 634, 409, 249, 22, 24, 1000, 126]}

 75%|███████▍  | 749997/1000000 [8:26:40<2:14:14, 31.04it/s]global step 750000, trans_decision ep_re 1027.6764526547875

{"global_step": 750000, "eval_re": [430.1632285710191, 5235.835248164962, 
14.855571886051573, 16.865788675984902, 17.537395913887895, 14.662565814909026, 
13.058561918278924, 14.66957452229382, 13.212214061093638, 4505.904377019393], 
"eval_len": [169, 1000, 22, 25, 24, 22, 23, 23, 22, 969]}

 76%|███████▌  | 759997/1000000 [8:33:14<2:03:00, 32.52it/s]global step 760000, trans_decision ep_re 551.01389256205

{"global_step": 760000, "eval_re": [162.54604514447934, 424.2362199868905, 
147.05301231899531, 234.2596314785504, 305.534561355647, 916.0524029870229, 
433.31104387791316, 1805.367276231335, 12.31451900287349, 1069.464213236794], 
"eval_len": [100, 161, 85, 124, 141, 262, 165, 422, 23, 353]}

 77%|███████▋  | 769999/1000000 [8:40:10<1:57:41, 32.57it/s]global step 770000, trans_decision ep_re 800.5558913698642

{"global_step": 770000, "eval_re": [171.6918318535975, 12.175613451204649, 
12.765101432338097, 11.597258239746216, 1912.1202273317604, 181.99816005268303, 
633.7230384974464, 4422.919095111713, 12.935865589789959, 633.6327221383629], 
"eval_len": [97, 21, 22, 22, 451, 99, 230, 1000, 23, 210]}

 78%|███████▊  | 779997/1000000 [8:46:47<1:54:25, 32.05it/s]global step 780000, trans_decision ep_re 542.2874148893021

{"global_step": 780000, "eval_re": [14.646593079413801, 12.783790529390867, 
12.171075538498272, 14.93456938162326, 12.278080596008083, 5019.548029489419, 
13.588941528567188, 294.91660704235915, 12.790584969072638, 15.21587673866875], 
"eval_len": [24, 23, 21, 24, 21, 1000, 23, 152, 24, 23]}

 79%|███████▉  | 789999/1000000 [8:53:32<1:47:01, 32.70it/s]global step 790000, trans_decision ep_re 874.8248521818441

{"global_step": 790000, "eval_re": [219.27524863394115, 13.308197438184507, 
4398.038318149684, 133.99449031847996, 1307.0785003378328, 158.0952800522219, 
130.02696062803605, 1405.7294035887994, 833.2631377812451, 149.43898489001685], 
"eval_len": [103, 24, 831, 91, 331, 98, 91, 344, 243, 84]}

 80%|███████▉  | 799998/1000000 [9:00:30<1:43:49, 32.10it/s]global step 800000, trans_decision ep_re 1870.9563365589227

{"global_step": 800000, "eval_re": [3021.267503605089, 3983.65925145609, 
140.16878349499444, 2259.647997542304, 130.39067583497115, 11.851786456246709, 
13.698852902202235, 1010.9782162920769, 3077.0945296525474, 5060.805768352706], 
"eval_len": [603, 781, 87, 509, 81, 22, 23, 273, 652, 975]}

 81%|████████  | 809997/1000000 [9:07:12<1:37:14, 32.57it/s]global step 810000, trans_decision ep_re 641.1050740177968

{"global_step": 810000, "eval_re": [11.55753492583654, 11.457540526513696, 
819.302815689772, 12.61787685259519, 12.730246268930753, 14.685704377669802, 
13.545018154561642, 13.523983818570775, 5487.950019385584, 13.680000177934227], 
"eval_len": [22, 24, 244, 22, 23, 22, 23, 23, 1000, 22]}

 82%|████████▏ | 819999/1000000 [9:14:10<1:33:34, 32.06it/s]global step 820000, trans_decision ep_re 1939.5940769938939

{"global_step": 820000, "eval_re": [2060.1041867518484, 10.48011789232436, 
3857.4149750512165, 12.86851399230596, 4430.746772408266, 2165.454913171154, 
3634.6496089629236, 12.025711056504711, 11.08670825883881, 3201.1092623935533], 
"eval_len": [475, 22, 745, 22, 859, 473, 713, 22, 22, 616]}

 83%|████████▎ | 829997/1000000 [9:21:00<1:27:07, 32.52it/s]global step 830000, trans_decision ep_re 637.3277240695226

{"global_step": 830000, "eval_re": [12.20895304968139, 11.440304276129508, 
229.9706400542338, 11.28040890385113, 2886.6391384968492, 476.1573343515267, 
11.595141365110653, 1818.8144012683447, 734.0009803175678, 181.16993861193077], 
"eval_len": [22, 23, 137, 21, 694, 199, 22, 425, 231, 110]}

 84%|████████▍ | 839997/1000000 [9:27:50<1:23:12, 32.05it/s]global step 840000, trans_decision ep_re 1658.3480570834843

{"global_step": 840000, "eval_re": [12.003343067632235, 5372.425696773863, 
319.90679484497565, 10.356716780137843, 389.28738591165586, 11.21259735868871, 
4964.049583396003, 5234.279986351413, 12.43758920312146, 257.5208771473487], 
"eval_len": [22, 1000, 143, 22, 177, 23, 1000, 992, 23, 127]}

 85%|████████▍ | 849997/1000000 [9:34:40<1:17:24, 32.30it/s]global step 850000, trans_decision ep_re 1078.8223846882759

{"global_step": 850000, "eval_re": [4862.296462509798, 792.9340100045548, 
2473.612731210794, 1262.026009363006, 341.82442996271243, 134.02053909974714, 
521.2373812847868, 12.805891932824077, 192.07026711606497, 195.39612439846942], 
"eval_len": [1000, 253, 638, 339, 158, 79, 200, 22, 110, 112]}

 86%|████████▌ | 859997/1000000 [9:41:15<1:12:10, 32.33it/s]global step 860000, trans_decision ep_re 591.8760667707343

{"global_step": 860000, "eval_re": [184.3249293821079, 139.82408949437854, 
147.17759917031543, 12.01078853692348, 15.497907792745416, 2631.378871283227, 
13.13895879461101, 2750.6193232470596, 12.491305022785948, 12.296894983188634], 
"eval_len": [99, 84, 85, 21, 23, 560, 22, 566, 22, 24]}

 87%|████████▋ | 869999/1000000 [9:48:10<1:06:19, 32.67it/s]global step 870000, trans_decision ep_re 1518.3319616019578

{"global_step": 870000, "eval_re": [382.54546370060274, 755.5552814654658, 
2428.7620743547504, 993.8052075368449, 3135.3391686580762, 210.96380696604606, 
174.61939061728572, 399.86443049113774, 4552.744682760827, 2149.1201094685416], 
"eval_len": [160, 274, 558, 305, 789, 150, 198, 213, 1000, 523]}

 88%|████████▊ | 879997/1000000 [9:54:52<1:01:44, 32.40it/s]global step 880000, trans_decision ep_re 615.9995757087108

{"global_step": 880000, "eval_re": [10.731681353932684, 180.41157450239, 
897.7520380733581, 2529.735719514575, 14.645353922383297, 2296.773564783162, 
14.871690568479016, 12.208071746159547, 11.458156685697741, 191.40790593697056],
"eval_len": [22, 168, 256, 584, 24, 494, 24, 24, 23, 117]}

 89%|████████▉ | 889999/1000000 [10:01:37<56:51, 32.25it/s]global step 890000, trans_decision ep_re 34.964821503362984

{"global_step": 890000, "eval_re": [17.19346025721885, 23.588592997680596, 
43.39587918699623, 71.89051506713473, 31.499085766771287, 36.788877378425475, 
37.915698333168095, 39.64525842039531, 14.50082546858879, 33.230022157250474], 
"eval_len": [28, 135, 71, 146, 120, 143, 165, 98, 120, 142]}

 90%|████████▉ | 899999/1000000 [10:08:22<51:50, 32.15it/s]global step 900000, trans_decision ep_re 331.27154129806325

{"global_step": 900000, "eval_re": [11.091052742248664, 11.22038899357268, 
520.7557559001117, 370.94231514314157, 12.603403886181415, 13.41856381372755, 
12.286717469898786, 2338.4635584393895, 11.176093324700455, 10.757563267660496],
"eval_len": [23, 23, 221, 204, 24, 25, 22, 609, 22, 23]}

 91%|█████████ | 909998/1000000 [10:15:06<46:00, 32.61it/s]global step 910000, trans_decision ep_re 325.1753789478558

{"global_step": 910000, "eval_re": [125.05008353318208, 11.81150480975466, 
12.060080027991841, 11.55843147187987, 163.2716865691129, 136.17943330499963, 
126.05544464386782, 404.4534763047483, 219.04215069546248, 2042.2714981175584], 
"eval_len": [80, 22, 22, 22, 100, 84, 79, 198, 120, 496]}

 92%|█████████▏| 919996/1000000 [10:21:51<41:04, 32.46it/s]global step 920000, trans_decision ep_re 1189.1557471540375

{"global_step": 920000, "eval_re": [11.693910104019654, 232.75800725930424, 
333.1138269989403, 125.16376896289786, 4357.355841358418, 264.09356891784915, 
1496.2054792804167, 4219.370460290202, 202.12787886208963, 649.6747295062371], 
"eval_len": [23, 122, 156, 81, 960, 127, 368, 834, 105, 220]}

 93%|█████████▎| 929997/1000000 [10:28:41<36:27, 32.00it/s]global step 930000, trans_decision ep_re 1071.9407770863454

{"global_step": 930000, "eval_re": [486.9832539172048, 12.431882206908858, 
273.34198736048813, 4891.5072740716305, 304.27036820371654, 3937.246654907039, 
12.23374643478414, 12.65512740752809, 634.9590720202639, 153.77840433389105], 
"eval_len": [176, 22, 145, 952, 139, 790, 24, 23, 268, 101]}

 94%|█████████▍| 939999/1000000 [10:35:40<30:37, 32.65it/s]global step 940000, trans_decision ep_re 880.0582853192276

{"global_step": 940000, "eval_re": [12.13798419883313, 636.6072688305956, 
12.71687834670792, 12.281213263499495, 2817.2053249177934, 4552.069760023226, 
455.87514301925586, 13.434173690020165, 277.78518787082334, 10.469919031522807],
"eval_len": [24, 250, 23, 24, 604, 963, 187, 23, 142, 21]}

 95%|█████████▍| 949997/1000000 [10:42:30<25:33, 32.61it/s]global step 950000, trans_decision ep_re 2050.3362600989976

{"global_step": 950000, "eval_re": [2920.0757133070906, 4146.271269422664, 
11.348994651706962, 3120.026207924808, 13.853869641385351, 2171.57503699912, 
13.674765127006316, 5036.909308404125, 3055.5601738826617, 14.067261629408153], 
"eval_len": [584, 826, 22, 678, 23, 557, 23, 1000, 677, 24]}

 96%|█████████▌| 959997/1000000 [10:49:12<20:28, 32.57it/s]global step 960000, trans_decision ep_re 1297.9395532528401

{"global_step": 960000, "eval_re": [12.938948843173698, 2262.7138496623093, 
11.611171129334565, 2891.3684420745767, 1443.4545122391432, 12.83339887705497, 
3083.790403192678, 2931.031364049169, 10.798322256721905, 318.8551202042407], 
"eval_len": [23, 475, 23, 622, 362, 23, 631, 623, 21, 137]}

 97%|█████████▋| 969999/1000000 [10:56:01<15:28, 32.32it/s]global step 970000, trans_decision ep_re 957.8375838431268

{"global_step": 970000, "eval_re": [12.281155680944638, 530.9671763427732, 
2318.7466320843314, 2871.964927250707, 12.366141848329374, 1378.539762537741, 
14.56673835811799, 1096.0063176368699, 183.87715304555402, 1159.059833645899], 
"eval_len": [23, 192, 515, 624, 22, 375, 26, 297, 102, 340]}

 98%|█████████▊| 979998/1000000 [11:02:51<10:18, 32.36it/s]global step 980000, trans_decision ep_re 916.9374294294979

{"global_step": 980000, "eval_re": [393.44898012634854, 343.05307933548136, 
668.027762867697, 2974.5422826232157, 148.119552885466, 154.04754127205388, 
2487.685286427755, 220.82134188588196, 1417.5737829477007, 362.05468392337883], 
"eval_len": [176, 146, 258, 653, 93, 136, 573, 130, 370, 154]}

 99%|█████████▉| 989999/1000000 [11:10:00<05:12, 32.02it/s]global step 990000, trans_decision ep_re 2008.4313058880894

{"global_step": 990000, "eval_re": [3495.593789631374, 1620.6201007007278, 
208.16713937551427, 5205.295308761557, 947.471582066227, 3079.4675522528833, 
2071.8211065271926, 2400.5812285147026, 1044.0078692035045, 11.287381847205513],
"eval_len": [759, 389, 115, 1000, 265, 678, 457, 535, 275, 24]}

100%|█████████▉| 999997/1000000 [11:16:50<00:00, 32.06it/s]global step 1000000, trans_decision ep_re 1262.8448505755214

{"global_step": 1000000, "eval_re": [11.088163978643715, 11.694613113615075, 
533.6941518469204, 11.387767642700368, 1776.833287977975, 302.80872195531504, 
13.026483185853495, 706.4002745700077, 4276.101595367365, 4985.413446116818], 
"eval_len": [23, 21, 216, 21, 403, 142, 24, 227, 859, 962]}

100%|██████████| 1000000/1000000 [11:16:54<00:00, 24.62it/s]
