
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:14<10:36:43, 25.91it/s]global step 10000, trans_decision ep_re 11.529808959189012

{"global_step": 10000, "eval_re": [2.395484715032012, 17.279717791031292, 
6.217134795124736, 5.226031644715481, 10.273597410557711, 23.24468707311179, 
17.47981245707358, 16.143979775611847, 4.142903250613292, 12.894740679018362], 
"eval_len": [29, 26, 18, 17, 21, 52, 41, 39, 19, 28]}

  2%|▏         | 19997/1000000 [13:00<10:37:47, 25.61it/s]global step 20000, trans_decision ep_re 53.60631615806462

{"global_step": 20000, "eval_re": [4.441506816274446, 201.94504470351688, 
131.71769696635812, 10.405326767813085, 132.34595292095835, 17.400591917557346, 
13.236055479192602, 10.53785656829405, 6.986513835366767, 7.046615605314572], 
"eval_len": [19, 125, 226, 26, 80, 35, 26, 21, 31, 18]}

  3%|▎         | 29999/1000000 [21:30<10:30:57, 25.62it/s]global step 30000, trans_decision ep_re 57.314960711051675

{"global_step": 30000, "eval_re": [68.32534176087461, 10.204337346731547, 
2.1958597194951577, 280.00999674908616, 6.8686614620669255, 15.216306253276798, 
134.1302265753886, 38.23450216184767, 9.134339928090577, 8.830035153658748], 
"eval_len": [80, 21, 18, 197, 19, 27, 123, 49, 21, 23]}

  4%|▍         | 39997/1000000 [29:53<10:24:52, 25.61it/s]global step 40000, trans_decision ep_re 12.615741613786678

{"global_step": 40000, "eval_re": [12.42770981304367, 11.478733372042491, 
6.242111268020724, 18.590608055833265, 9.060258172724097, 5.89072571496456, 
13.821510514858572, 19.499860778031735, 16.243130693163238, 12.902767755184431],
"eval_len": [31, 27, 19, 31, 30, 17, 31, 36, 26, 26]}

  5%|▍         | 49998/1000000 [38:40<10:14:04, 25.78it/s]global step 50000, trans_decision ep_re 24.791521694692026

{"global_step": 50000, "eval_re": [18.84298007687225, 7.167556419358261, 
13.489572770626118, 1.8078354501932872, 7.978420134955033, 2.0857003156689373, 
14.846340177040082, 11.57802646707567, 158.69282335207365, 11.425961783056982], 
"eval_len": [29, 19, 27, 25, 21, 14, 29, 29, 120, 27]}

  6%|▌         | 59998/1000000 [47:20<9:59:38, 26.13it/s]global step 60000, trans_decision ep_re 11.25678484695895

{"global_step": 60000, "eval_re": [15.148767645211391, 5.172844809925286, 
4.533847997404601, 14.102026941263034, 11.517050983031906, 5.889755402325488, 
18.009639968193913, 10.284563201665174, 7.476963648415705, 20.43238787215301], 
"eval_len": [28, 23, 17, 28, 24, 16, 32, 26, 22, 32]}

  7%|▋         | 69997/1000000 [56:00<10:09:19, 25.44it/s]global step 70000, trans_decision ep_re 81.34544697826803

{"global_step": 70000, "eval_re": [140.26211188244648, 17.594911433345914, 
4.905125257395403, 152.59037832478995, 11.894727291632323, 8.611060855909644, 
76.62578204846973, 3.7634313865525217, 220.6371204055756, 176.56982089656262], 
"eval_len": [100, 27, 15, 104, 25, 22, 60, 22, 175, 191]}

  8%|▊         | 79998/1000000 [1:04:22<10:02:09, 25.46it/s]global step 80000, trans_decision ep_re 30.487322111603362

{"global_step": 80000, "eval_re": [0.988758050670491, 12.807723633139512, 
5.338647846434298, 16.880605798136095, 245.81547740888394, 2.3807307950818783, 
6.633014654427813, 2.915008475471855, -0.0029066599050934297, 
11.116161113692812], "eval_len": [27, 28, 17, 29, 168, 26, 28, 19, 25, 27]}

  9%|▉         | 89999/1000000 [1:13:01<9:54:32, 25.51it/s]global step 90000, trans_decision ep_re 10.558993110928046

{"global_step": 90000, "eval_re": [3.1916758130969476, 4.436214350901958, 
18.194779666702864, 15.447598873980475, 6.945234779827082, 5.0637648918695, 
14.807698235185924, 26.178755643197622, 5.680655485011574, 5.643553369506511], 
"eval_len": [27, 17, 27, 31, 20, 24, 36, 47, 22, 19]}

 10%|▉         | 99997/1000000 [1:21:50<9:48:53, 25.47it/s]global step 100000, trans_decision ep_re 82.92986242220385

{"global_step": 100000, "eval_re": [172.49845735648742, 7.988958975408715, 
10.024933122086491, 0.435047589194629, 131.63526956131838, 123.6174611076022, 
342.1511791816035, 16.192988648959396, 9.138075567598996, 15.616253111778734], 
"eval_len": [104, 29, 26, 24, 102, 111, 213, 28, 29, 27]}

 11%|█         | 109998/1000000 [1:30:30<9:50:15, 25.13it/s]global step 110000, trans_decision ep_re 28.648195670073108

{"global_step": 110000, "eval_re": [14.913703642781444, 1.4895661727437925, 
18.527467461888538, 102.88765378803934, 12.001255076909791, 8.198759472834212, 
12.777618292011638, 2.6351931730240152, 13.293533702724655, 99.7572059177736], 
"eval_len": [26, 27, 27, 102, 30, 20, 21, 17, 29, 79]}

 12%|█▏        | 119999/1000000 [1:38:54<9:36:21, 25.45it/s]global step 120000, trans_decision ep_re 115.6601291681663

{"global_step": 120000, "eval_re": [8.056683538226832, 202.7964573742611, 
12.015676209837864, 1.8587165329201973, 180.76553832465646, 702.7771969295346, 
9.221888892111899, 17.596013810266598, 12.215748133779387, 9.297371936067973], 
"eval_len": [24, 115, 27, 14, 96, 331, 23, 30, 30, 22]}

 13%|█▎        | 129997/1000000 [1:47:34<9:30:04, 25.44it/s]global step 130000, trans_decision ep_re 44.10193674756636

{"global_step": 130000, "eval_re": [-0.984917601846319, 15.156964796054037, 
8.370104668931974, 4.070286458998536, 15.452301508003362, 8.85823318031145, 
6.454238033237936, 14.0612000086439, 356.8319643980527, 12.74899202527604], 
"eval_len": [16, 30, 24, 14, 27, 27, 22, 28, 169, 22]}

 14%|█▍        | 139998/1000000 [1:56:13<9:19:37, 25.61it/s]global step 140000, trans_decision ep_re 7.348882006825105

{"global_step": 140000, "eval_re": [3.4583400684769674, 4.760348089983212, 
4.302350819731746, 20.361985319505436, 15.89183918843924, 3.2913854060244767, 
6.146817187703417, 1.597244198745917, 10.044615131135501, 3.6338946585051426], 
"eval_len": [27, 27, 22, 29, 31, 21, 17, 19, 28, 15]}

 15%|█▍        | 149998/1000000 [2:04:54<9:13:03, 25.62it/s]global step 150000, trans_decision ep_re 18.24739941333457

{"global_step": 150000, "eval_re": [17.004609018716803, 7.222289681641795, 
106.62610649688368, 2.4521794404581603, 4.931301312037561, 6.149511658424753, 
7.694768616429193, 7.142123015575471, 16.06860542087896, 7.182499472299339], 
"eval_len": [28, 29, 96, 28, 19, 22, 24, 18, 27, 28]}

 16%|█▌        | 159999/1000000 [2:13:50<9:11:58, 25.36it/s]global step 160000, trans_decision ep_re 56.20754021123904

{"global_step": 160000, "eval_re": [2.4526630540489864, 10.25109603468188, 
9.978652917899279, 15.363046484741155, 4.382195526451228, 11.389806988649033, 
9.940449258831636, 477.5178693838214, 16.090196865659035, 4.709425597606786], 
"eval_len": [22, 23, 23, 29, 15, 28, 18, 219, 29, 26]}

 17%|█▋        | 169997/1000000 [2:22:15<9:09:10, 25.19it/s]global step 170000, trans_decision ep_re 21.87769814727131

{"global_step": 170000, "eval_re": [8.349992884647422, 13.20818153951283, 
9.58506008246163, 10.079599260334252, 9.741507798435396, 8.138815450086758, 
92.56953002631336, 32.27412430716525, 16.846541295140394, 17.983628828615796], 
"eval_len": [24, 29, 22, 28, 26, 28, 76, 50, 31, 28]}

 18%|█▊        | 179997/1000000 [2:31:10<9:05:20, 25.06it/s]global step 180000, trans_decision ep_re 38.68387421989709

{"global_step": 180000, "eval_re": [9.31793855484417, 3.2881419879203793, 
6.963204498523517, 9.522694056207458, 11.621941610434032, 4.973867182239963, 
321.6746881348526, 4.688921690838122, 7.71381723495271, 7.073527248157925], 
"eval_len": [27, 15, 24, 28, 30, 15, 241, 18, 24, 28]}

 19%|█▉        | 189999/1000000 [2:39:36<8:53:33, 25.30it/s]global step 190000, trans_decision ep_re 35.31501126698307

{"global_step": 190000, "eval_re": [5.2593734272657535, 3.0589911872546325, 
13.690999683718191, 2.9563346326966706, 8.448804024531045, 5.3984615297985075, 
12.675596206994365, 13.047264684982652, 16.84425662480146, 271.7700306677874], 
"eval_len": [17, 18, 27, 13, 20, 19, 28, 28, 27, 155]}

 20%|█▉        | 199999/1000000 [2:48:15<8:39:26, 25.67it/s]global step 200000, trans_decision ep_re 47.516171301095696

{"global_step": 200000, "eval_re": [16.65254482005496, 7.349583517923346, 
383.6571313329294, 19.86258027266257, 2.98009601273599, 5.310924635554873, 
10.704676370479593, 11.750112925037342, 9.93907835835805, 6.954984765220864], 
"eval_len": [28, 18, 263, 30, 15, 17, 31, 30, 20, 24]}

 21%|██        | 209998/1000000 [2:56:51<8:33:05, 25.66it/s]global step 210000, trans_decision ep_re 136.52772076748957

{"global_step": 210000, "eval_re": [220.8795515179351, 6.812574215206826, 
862.4818279867619, 15.350264953926352, 222.48120659654057, 11.603108607234853, 
1.3641448492769686, 1.4364087742895695, 12.28699450143591, 10.581125672287447], 
"eval_len": [129, 28, 393, 30, 143, 27, 11, 27, 28, 26]}

 22%|██▏       | 219999/1000000 [3:05:31<8:28:06, 25.58it/s]global step 220000, trans_decision ep_re 70.13003093519623

{"global_step": 220000, "eval_re": [5.907801296025105, 624.1809997788749, 
13.006513286843772, 6.267046186528295, 1.6881127037584498, 3.0509161997407195, 
3.0365262029141444, 18.891789537059577, 7.270544772682379, 18.000059387534833], 
"eval_len": [21, 252, 34, 19, 25, 17, 15, 28, 18, 31]}

 23%|██▎       | 229998/1000000 [3:14:20<8:20:50, 25.62it/s]global step 230000, trans_decision ep_re 100.63215019187692

{"global_step": 230000, "eval_re": [8.965007594998097, 5.087353227895786, 
5.579762882200064, 10.81015584064208, 2.640387667007322, 9.276663742852119, 
96.44537076313418, 50.14733021352875, 4.170445702375639, 813.1990242841351], 
"eval_len": [18, 16, 20, 26, 16, 23, 95, 78, 21, 329]}

 24%|██▍       | 239997/1000000 [3:22:45<8:12:03, 25.74it/s]global step 240000, trans_decision ep_re 40.595068629318845

{"global_step": 240000, "eval_re": [6.7258064031035785, 19.25579836305677, 
16.61018173226315, 7.725168719997187, 53.075198763899344, 11.861780885292761, 
11.091321652358202, 10.390553556799892, 7.444459935398464, 261.7704162810191], 
"eval_len": [27, 29, 26, 23, 63, 25, 27, 24, 29, 137]}

 25%|██▍       | 249998/1000000 [3:31:22<8:09:49, 25.52it/s]global step 250000, trans_decision ep_re 64.20232732829257

{"global_step": 250000, "eval_re": [3.545951262308944, 2.9539048741017613, 
443.79048585585076, 12.379456312068644, 4.321547923995144, 3.1845470171529033, 
12.688074162326366, 144.95117878835086, 3.305295019937811, 10.902832066832508], 
"eval_len": [16, 13, 209, 22, 15, 15, 29, 132, 24, 22]}

 26%|██▌       | 259999/1000000 [3:40:10<8:01:54, 25.59it/s]global step 260000, trans_decision ep_re 34.550590681179706

{"global_step": 260000, "eval_re": [11.954460958439052, 16.23229802493781, 
15.313351459593035, 12.616721092794203, 4.67509296084963, 9.224394343494655, 
255.46608854803617, 8.903285631677127, 3.684535376954301, 7.435678415021062], 
"eval_len": [25, 29, 26, 32, 17, 23, 136, 26, 24, 27]}

 27%|██▋       | 269999/1000000 [3:48:35<7:54:35, 25.64it/s]global step 270000, trans_decision ep_re 27.4777792411573

{"global_step": 270000, "eval_re": [1.8393970616518451, 1.4703990466961383, 
147.46737726147197, 4.445464654292362, 40.486950078958955, 8.145537267115015, 
5.013870188155103, 1.5477994937049677, 57.16877638741863, 7.192220972108008], 
"eval_len": [17, 20, 159, 30, 57, 28, 15, 15, 77, 27]}

 28%|██▊       | 279999/1000000 [3:57:11<7:51:35, 25.45it/s]global step 280000, trans_decision ep_re 137.68148248097629

{"global_step": 280000, "eval_re": [6.153180558315525, 135.8248030606909, 
3.967960585853594, 251.2511316064734, 522.928181299693, 6.72057447112646, 
14.057273185598143, 415.48837683270676, 13.108740952033795, 7.3146022572712255],
"eval_len": [17, 150, 21, 201, 241, 21, 31, 181, 26, 26]}

 29%|██▉       | 289997/1000000 [4:06:00<7:40:21, 25.70it/s]global step 290000, trans_decision ep_re 111.02846597332523

{"global_step": 290000, "eval_re": [12.58008835596548, 49.93757098051888, 
7.229119546811127, 843.8696226276294, 11.200571423397436, 67.50033181939311, 
91.58179988426127, 14.169606973617832, 7.201382058030811, 5.014566063626887], 
"eval_len": [23, 59, 17, 469, 28, 80, 99, 26, 26, 15]}

 30%|██▉       | 299997/1000000 [4:14:22<7:34:55, 25.65it/s]global step 300000, trans_decision ep_re 106.34045217720855

{"global_step": 300000, "eval_re": [2.8613919153783223, 4.149298476649822, 
4.589776990468408, 773.9692607153469, 10.901143077409868, 9.711331461852499, 
14.20679579058473, 8.677233568419826, 221.2423106923545, 13.095979083620668], 
"eval_len": [15, 25, 18, 353, 21, 37, 24, 25, 325, 27]}

 31%|███       | 309999/1000000 [4:23:10<7:25:34, 25.81it/s]global step 310000, trans_decision ep_re 146.1665722441349

{"global_step": 310000, "eval_re": [12.50374652408798, 443.3077435733001, 
716.9717306085366, 21.183579673387577, 2.4172751196377593, 6.704178125910428, 
10.600519278765947, 8.998704845318112, 14.032112649879567, 224.94613204252468], 
"eval_len": [26, 189, 315, 30, 14, 19, 26, 28, 27, 174]}

 32%|███▏      | 319998/1000000 [4:31:31<7:17:00, 25.93it/s]global step 320000, trans_decision ep_re 63.491954011795805

{"global_step": 320000, "eval_re": [13.225039035455369, 9.575117283364683, 
262.29081358489515, 265.54097039846414, 6.394036738324398, 6.234558725431664, 
58.80825199852145, 5.92517157174356, 4.467569853171513, 2.458010928586124], 
"eval_len": [24, 24, 141, 134, 19, 21, 96, 22, 18, 15]}

 33%|███▎      | 329998/1000000 [4:40:05<7:08:30, 26.06it/s]global step 330000, trans_decision ep_re 50.90462346857013

{"global_step": 330000, "eval_re": [10.094004200306493, 216.77176820648327, 
10.730772740366659, 187.92301145220284, 9.576353787326521, 2.6539735233216026, 
48.47375681393966, 6.063771732841664, 14.779038191806416, 1.9797840371061053], 
"eval_len": [32, 116, 25, 149, 25, 19, 58, 18, 24, 16]}

 34%|███▍      | 339999/1000000 [4:48:50<7:04:50, 25.89it/s]global step 340000, trans_decision ep_re 80.09954966637329

{"global_step": 340000, "eval_re": [4.875855223033295, 501.6816243678803, 
20.08501090527798, 6.69830107719701, 5.221339080386416, 11.451328272197326, 
222.05263065974862, 7.012486746707878, 6.106146106218279, 15.810774225085837], 
"eval_len": [17, 291, 29, 19, 22, 25, 119, 26, 19, 24]}

 35%|███▍      | 349999/1000000 [4:57:11<6:56:12, 26.03it/s]global step 350000, trans_decision ep_re 50.44338346658971

{"global_step": 350000, "eval_re": [5.290448477968916, 9.82954040532487, 
2.5854873583654125, 8.222059862190491, 9.126445744021824, 156.22315379751586, 
6.753223507245449, 8.302480933634389, 6.118658686575021, 291.9823358930548], 
"eval_len": [17, 25, 18, 17, 22, 103, 17, 27, 25, 158]}

 36%|███▌      | 359999/1000000 [5:05:43<6:51:00, 25.95it/s]global step 360000, trans_decision ep_re 129.42310204188135

{"global_step": 360000, "eval_re": [600.8130522275094, -0.44461170692974394, 
58.87056483394744, 393.87996427831325, 3.353440766620207, 5.35530276111205, 
220.76308448295444, 11.35212004490725, 1.5688083964534607, -1.2807056660741507],
"eval_len": [240, 15, 68, 173, 19, 19, 121, 27, 18, 15]}

 37%|███▋      | 369999/1000000 [5:14:30<6:45:11, 25.91it/s]global step 370000, trans_decision ep_re 125.60813175659976

{"global_step": 370000, "eval_re": [277.0223285894735, 417.4951533273936, 
12.093824735984697, 7.000399424285002, 1.1296295867758173, 11.777412469438069, 
6.582655640495312, 345.9431631475327, 167.12223369697472, 9.914516947644266], 
"eval_len": [170, 194, 23, 24, 21, 22, 24, 157, 99, 20]}

 38%|███▊      | 379997/1000000 [5:22:53<6:41:57, 25.71it/s]global step 380000, trans_decision ep_re 165.01763845892364

{"global_step": 380000, "eval_re": [517.3804714793957, 19.536385765135787, 
12.417124194193851, 300.9094751298221, 7.433746282978602, 425.8984117471466, 
11.427363652701924, 230.65391632642192, 119.48244559200617, 5.037044419433637], 
"eval_len": [249, 41, 37, 168, 18, 182, 26, 152, 119, 18]}

 39%|███▉      | 389998/1000000 [5:31:40<6:31:02, 26.00it/s]global step 390000, trans_decision ep_re 28.8573703971331

{"global_step": 390000, "eval_re": [71.17752270802617, 10.580002049217299, 
9.25742414711668, 8.424222278222649, 2.778370637492491, 77.67461690115242, 
20.405695820118, 70.91425802284492, 13.121782616353046, 4.23980879078724], 
"eval_len": [64, 22, 25, 17, 24, 79, 28, 72, 23, 25]}

 40%|███▉      | 399998/1000000 [5:40:02<6:24:04, 26.04it/s]global step 400000, trans_decision ep_re 283.57744139065414

{"global_step": 400000, "eval_re": [12.054138137814581, 440.1670350738123, 
175.85112809492693, 7.2390270957919665, 16.17133168597908, 240.04737153244082, 
207.46534395859453, 6.216259180066919, 639.3200103465815, 1091.242768800533], 
"eval_len": [24, 278, 254, 18, 27, 138, 116, 20, 318, 417]}

 41%|████      | 409999/1000000 [5:48:50<6:21:03, 25.81it/s]global step 410000, trans_decision ep_re 120.2522119150165

{"global_step": 410000, "eval_re": [5.044772447678936, 355.02380037407033, 
8.562115613316065, 3.2372905707863566, 1.3984077569187723, 59.956174763565535, 
5.9704132519336515, 14.396430330439664, 521.722239012372, 227.21047502908357], 
"eval_len": [23, 154, 22, 24, 13, 68, 17, 25, 205, 137]}

 42%|████▏     | 419997/1000000 [5:57:13<6:18:11, 25.56it/s]global step 420000, trans_decision ep_re 132.97033517649348

{"global_step": 420000, "eval_re": [14.509058063440188, 6.801271894272159, 
8.512757492298885, 24.922090121518135, 319.10208988116943, 17.700671983110038, 
208.7519563965716, 14.699570308467722, 78.57072435862716, 636.1331612654593], 
"eval_len": [27, 19, 23, 48, 184, 25, 128, 39, 81, 234]}

 43%|████▎     | 429999/1000000 [6:06:00<6:04:15, 26.08it/s]global step 430000, trans_decision ep_re 31.622706791561654

{"global_step": 430000, "eval_re": [3.9042426650595914, 12.911191074005439, 
28.01412426685414, 9.344964583337541, -0.9048602186397254, 178.899196712473, 
54.591528202848586, 6.6142870632847925, 14.034403504069466, 8.817990062323688], 
"eval_len": [15, 25, 50, 20, 11, 99, 75, 17, 25, 22]}

 44%|████▍     | 439999/1000000 [6:14:30<6:04:17, 25.62it/s]global step 440000, trans_decision ep_re 68.18840245043026

{"global_step": 440000, "eval_re": [3.8202984378465485, 55.756100362449246, 
10.81638132210912, 5.228646995965293, 13.007239644726134, 407.01030781829587, 
89.03636756199397, 77.26376119980155, 12.667966501519654, 7.276954659595155], 
"eval_len": [18, 72, 40, 15, 36, 171, 78, 84, 21, 22]}

 45%|████▍     | 449997/1000000 [6:22:53<5:56:52, 25.69it/s]global step 450000, trans_decision ep_re 88.05681070578126

{"global_step": 450000, "eval_re": [7.769115901783388, 50.77355065015938, 
7.8752830795204005, 4.690625096034223, 177.78555801409897, 6.288547101985476, 
13.121149511575625, 8.844996155534556, 590.7183445887238, 12.700936958396834], 
"eval_len": [19, 90, 21, 16, 110, 27, 23, 26, 193, 39]}

 46%|████▌     | 459999/1000000 [6:31:40<5:46:30, 25.97it/s]global step 460000, trans_decision ep_re 35.818060517835725

{"global_step": 460000, "eval_re": [174.2977859073107, 1.6921172682702617, 
9.611320717762322, 11.031118280694109, 5.300389329792307, 116.57368352987432, 
5.640735838374288, 13.149943619486356, 11.139858057096918, 9.743652629695674], 
"eval_len": [111, 12, 25, 36, 15, 100, 24, 41, 26, 24]}

 47%|████▋     | 469999/1000000 [6:40:01<5:42:49, 25.77it/s]global step 470000, trans_decision ep_re 15.04068527252699

{"global_step": 470000, "eval_re": [8.919439592352626, 7.168566574219239, 
4.965777083694002, 15.581541412642434, 81.02342485166206, 11.967174923609617, 
0.6376557219432416, 3.9809703299280472, 3.662673832519965, 12.499628402698656], 
"eval_len": [19, 22, 27, 37, 70, 27, 26, 16, 18, 23]}

 48%|████▊     | 479997/1000000 [6:48:33<5:34:59, 25.87it/s]global step 480000, trans_decision ep_re 29.747276878903875

{"global_step": 480000, "eval_re": [3.90957199466637, 12.83924862310595, 
13.272650505701383, 7.021219010242171, 22.4955024936205, 9.207541684976928, 
5.324801269227083, 214.39887523989262, 8.401055494663275, 0.6023024729424066], 
"eval_len": [13, 39, 25, 20, 48, 27, 20, 126, 22, 16]}

 49%|████▉     | 489998/1000000 [6:57:20<5:24:41, 26.18it/s]global step 490000, trans_decision ep_re 30.713612499284046

{"global_step": 490000, "eval_re": [22.64391332235418, 14.651278443827167, 
1.1280904288267324, 76.96018624933025, 7.877472952364646, 30.21646580159995, 
6.947899716950944, 32.78377143802705, 107.27734423138614, 6.649702408173441], 
"eval_len": [47, 26, 22, 70, 19, 45, 34, 50, 103, 20]}

 50%|████▉     | 499999/1000000 [7:05:50<5:23:11, 25.78it/s]global step 500000, trans_decision ep_re 61.13369354749905

{"global_step": 500000, "eval_re": [29.09139915147982, 22.152718763872343, 
12.293805064121747, 26.315879114544718, 29.44203374288128, 98.06314124593891, 
13.903766000044232, 364.58738500733176, 12.453799131372483, 3.033008253403114], 
"eval_len": [47, 45, 26, 46, 50, 85, 36, 145, 40, 14]}

 51%|█████     | 509997/1000000 [7:14:14<5:15:46, 25.86it/s]global step 510000, trans_decision ep_re 86.67876489914445

{"global_step": 510000, "eval_re": [6.6958202828150055, 605.9949583671429, 
8.987491197271869, 102.74176377495421, 10.584783811219491, 46.451356657748676, 
58.061717664858264, 11.740118731682095, 12.9360249850299, 2.593613518722103], 
"eval_len": [22, 205, 44, 91, 24, 55, 80, 27, 26, 36]}

 52%|█████▏    | 519999/1000000 [7:23:00<5:10:35, 25.76it/s]global step 520000, trans_decision ep_re 10.70669919914181

{"global_step": 520000, "eval_re": [11.471055935060296, -0.09207073058788859, 
5.8932005487015156, 9.586395519907942, 7.313780804608276, 7.821515661652846, 
8.477252903041306, 8.205255924847755, 3.6779952380477887, 44.712610186138264], 
"eval_len": [21, 17, 22, 25, 22, 23, 20, 25, 31, 52]}

 53%|█████▎    | 529999/1000000 [7:31:30<5:02:57, 25.86it/s]global step 530000, trans_decision ep_re 16.485677830945416

{"global_step": 530000, "eval_re": [39.9296458175631, 18.84226629050888, 
9.92647191693827, 37.01769455990238, 20.06152414224247, 8.846917316206964, 
10.434963192043773, 1.984634549054904, 2.945128129137247, 14.867532395856191], 
"eval_len": [51, 45, 21, 53, 45, 19, 39, 12, 30, 46]}

 54%|█████▍    | 539997/1000000 [7:39:52<4:58:21, 25.70it/s]global step 540000, trans_decision ep_re 13.661385611219538

{"global_step": 540000, "eval_re": [11.626776615842816, 8.387572006143222, 
10.72389138365727, 6.8463804169492875, 26.162272275794738, 42.69574942140941, 
6.23722868040924, 7.246155723974486, 8.173899775159361, 8.513929812855535], 
"eval_len": [23, 21, 24, 17, 44, 50, 23, 18, 22, 22]}

 55%|█████▍    | 549998/1000000 [7:48:25<4:47:35, 26.08it/s]global step 550000, trans_decision ep_re 49.54002109384994

{"global_step": 550000, "eval_re": [16.280073315340314, 3.5754693385817955, 
154.0320949198212, 237.4402805791079, 11.068829745964983, 41.71744397130766, 
6.450312012122862, 6.455794132642989, 6.651222364190421, 11.72869055941931], 
"eval_len": [44, 15, 116, 126, 34, 64, 32, 22, 20, 22]}

 56%|█████▌    | 559999/1000000 [7:57:10<4:45:12, 25.71it/s]global step 560000, trans_decision ep_re 36.76055114189715

{"global_step": 560000, "eval_re": [110.66517092065213, 5.729936885295874, 
106.30193318500358, 8.025374800225814, 7.903080832242502, 95.49213144354992, 
14.240850308153123, 9.442785757500303, 0.1371573118282693, 9.667089974519984], 
"eval_len": [87, 21, 88, 22, 21, 75, 25, 23, 14, 23]}

 57%|█████▋    | 569998/1000000 [8:05:32<4:34:51, 26.07it/s]global step 570000, trans_decision ep_re 76.16212813143082

{"global_step": 570000, "eval_re": [23.244081140322507, 272.295215111347, 
208.89047888733822, 102.3098845457939, 10.7923566553802, 40.20948150748242, 
22.256145509523538, 12.39726123763369, 69.38031230441705, -0.15393558493033585],
"eval_len": [47, 120, 123, 82, 23, 52, 40, 26, 69, 15]}

 58%|█████▊    | 579999/1000000 [8:14:20<4:34:42, 25.48it/s]global step 580000, trans_decision ep_re 49.114951514281714

{"global_step": 580000, "eval_re": [6.132265311481889, 173.63999774063475, 
4.400526026672595, 2.1070286163247114, 6.045897615831488, 36.4904981485864, 
24.41327529683936, 77.35619517443202, 83.23104688385436, 77.33278432815965], 
"eval_len": [20, 93, 18, 14, 17, 51, 44, 68, 67, 70]}

 59%|█████▉    | 589999/1000000 [8:22:45<4:25:08, 25.77it/s]global step 590000, trans_decision ep_re 16.797875878397413

{"global_step": 590000, "eval_re": [12.031735685986861, 5.896652845941922, 
13.641343766821961, 3.666350733600489, 16.224297697607327, 0.7061156363958082, 
22.631125108423106, 7.498874509878012, 5.068106106505858, 80.61415669281281], 
"eval_len": [26, 17, 41, 13, 38, 15, 41, 23, 18, 66]}

 60%|█████▉    | 599999/1000000 [8:31:30<4:19:46, 25.66it/s]global step 600000, trans_decision ep_re 104.69997533899195

{"global_step": 600000, "eval_re": [11.81672777807957, 35.75280450130984, 
8.362088811931491, 8.528331915580598, 574.9022504026623, 6.436187755675624, 
3.6615703258406462, 381.03848573023174, 8.474347693198633, 8.026958475409234], 
"eval_len": [42, 52, 23, 26, 253, 18, 16, 173, 24, 23]}

 61%|██████    | 609997/1000000 [8:39:55<4:12:49, 25.71it/s]global step 610000, trans_decision ep_re 18.44706147642037

{"global_step": 610000, "eval_re": [4.096106761877481, 10.851372134415012, 
5.351797473739416, 4.605371560730405, 10.794026546347311, 7.88523465611391, 
111.24369425457378, 2.5001232963426707, 6.639167932875384, 20.503720147188346], 
"eval_len": [17, 23, 31, 17, 23, 19, 74, 13, 18, 40]}

 62%|██████▏   | 619999/1000000 [8:48:40<4:06:39, 25.68it/s]global step 620000, trans_decision ep_re 116.12659461378257

{"global_step": 620000, "eval_re": [11.932380593301335, 11.923891381631515, 
447.5647777957643, 1.270490064083369, 8.059265964043544, 8.561323637217011, 
9.054871906133362, 16.40511151915162, 314.76487525766845, 331.7289580188311], 
"eval_len": [36, 23, 201, 20, 22, 22, 43, 42, 164, 145]}

 63%|██████▎   | 629998/1000000 [8:57:20<3:57:37, 25.95it/s]global step 630000, trans_decision ep_re 19.632053077529093

{"global_step": 630000, "eval_re": [7.487113432647299, 5.983540311875018, 
5.574440146453468, 123.39797718229626, 3.02595304189212, 11.643571504188644, 
20.742562652926026, 5.060259941331681, 9.494212842055655, 3.910899719624789], 
"eval_len": [34, 17, 20, 77, 16, 21, 40, 15, 36, 24]}

 64%|██████▍   | 639998/1000000 [9:05:42<3:51:23, 25.93it/s]global step 640000, trans_decision ep_re 52.31656023016552

{"global_step": 640000, "eval_re": [7.365805639393281, 10.548487317151508, 
242.54183403331155, 29.078351321381042, 7.117290441548116, 3.872756665109275, 
76.01530237894087, 6.305048829501528, 133.1928613657182, 7.127864309599883], 
"eval_len": [22, 26, 175, 51, 21, 18, 71, 22, 83, 22]}

 65%|██████▍   | 649998/1000000 [9:14:30<3:43:46, 26.07it/s]global step 650000, trans_decision ep_re 184.63516024372416

{"global_step": 650000, "eval_re": [26.765975178530166, 22.55598541961026, 
1177.6700427117307, 7.527214723124276, 7.4198265040451385, 22.58796007109841, 
2.6350568066331412, 13.77542042883655, 556.2258499486668, 9.188270644966188], 
"eval_len": [43, 39, 427, 21, 25, 41, 13, 36, 246, 21]}

 66%|██████▌   | 659997/1000000 [9:22:54<3:42:47, 25.43it/s]global step 660000, trans_decision ep_re 173.95404081339385

{"global_step": 660000, "eval_re": [5.6513799377320435, 202.26626255660153, 
295.62862337953993, 7.483913022317366, 81.69882843199203, 21.29682642057485, 
14.063866833148875, 10.100014278579962, 273.64861725014134, 827.7020760233104], 
"eval_len": [16, 110, 136, 20, 92, 41, 24, 23, 127, 338]}

 67%|██████▋   | 669999/1000000 [9:31:40<3:33:23, 25.77it/s]global step 670000, trans_decision ep_re 133.81433680434915

{"global_step": 670000, "eval_re": [77.71512344474705, 513.9039054865035, 
14.843915094881176, 4.596512171637476, 7.62829261013459, 60.83185344005014, 
5.24045485841328, 631.4873613403586, 16.207086551349565, 5.688863045415825], 
"eval_len": [84, 179, 39, 16, 25, 60, 29, 245, 37, 16]}

 68%|██████▊   | 679997/1000000 [9:40:20<3:25:26, 25.96it/s]global step 680000, trans_decision ep_re 42.357541478025425

{"global_step": 680000, "eval_re": [18.145028158487502, 16.791833460923634, 
10.97538635714261, 13.170084237881996, 247.551602193187, 2.316503160389503, 
10.151406968233823, 45.042600047751904, 26.18838047636136, 33.242589719894944], 
"eval_len": [41, 39, 22, 38, 136, 16, 22, 82, 48, 47]}

 69%|██████▉   | 689997/1000000 [9:48:45<3:23:20, 25.41it/s]global step 690000, trans_decision ep_re 53.51684792984349

{"global_step": 690000, "eval_re": [17.643888258734773, 11.725670922015127, 
6.137801184806097, 6.278938326883557, 333.8059402605588, 5.49812034149764, 
127.16172617176724, 3.596030679865777, 15.562111639804758, 7.758251512500998], 
"eval_len": [42, 38, 19, 21, 172, 19, 112, 24, 38, 38]}

 70%|██████▉   | 699999/1000000 [9:57:25<3:10:48, 26.20it/s]global step 700000, trans_decision ep_re 144.41263292643032

{"global_step": 700000, "eval_re": [22.75003144284794, 8.094699957153594, 
1055.2757100852411, 293.63736469095363, 5.91617264068475, 31.38962953048948, 
10.007134174722736, 4.692921963196294, 6.315366501652526, 6.047298277361076], 
"eval_len": [40, 36, 329, 214, 23, 44, 23, 15, 19, 33]}

 71%|███████   | 709999/1000000 [10:06:10<3:06:32, 25.91it/s]global step 710000, trans_decision ep_re 7.653970908677321

{"global_step": 710000, "eval_re": [0.42949051942812416, 2.3727557527539536, 
11.035931584816998, 6.622594768177156, 7.139245123121407, 18.160182336048372, 
9.638379669837182, 5.747153968055376, 5.832371418673223, 9.56160394586142], 
"eval_len": [12, 33, 23, 25, 18, 41, 23, 18, 20, 25]}

 72%|███████▏  | 719998/1000000 [10:14:32<2:57:49, 26.24it/s]global step 720000, trans_decision ep_re 104.65026674637895

{"global_step": 720000, "eval_re": [5.727791524661916, 27.487195534590484, 
3.9716747667520393, 41.79595037629868, 169.57214492630942, 731.0613615568553, 
22.446588425598637, 7.241769694199035, 15.818864400798, 21.37932625772606], 
"eval_len": [29, 42, 36, 60, 117, 243, 45, 18, 41, 43]}

 73%|███████▎  | 729998/1000000 [10:23:06<2:51:55, 26.17it/s]global step 730000, trans_decision ep_re 22.954421953481717

{"global_step": 730000, "eval_re": [8.892250820951151, 30.929501040916026, 
26.526996176740088, 4.479270362842782, 114.21200166307416, 14.546411802278975, 
11.109410589451103, 4.896230108304078, 7.4087254092523125, 6.543421561006483], 
"eval_len": [23, 45, 45, 18, 73, 40, 25, 28, 22, 22]}

 74%|███████▍  | 739999/1000000 [10:31:41<2:49:27, 25.57it/s]global step 740000, trans_decision ep_re 82.3484745738067

{"global_step": 740000, "eval_re": [32.48291597794957, 4.273966767398365, 
7.131252456839187, 3.6839848231321053, 150.7136767657453, 19.443958203318864, 
8.849187447935474, 24.2117899007072, 178.9297261875953, 393.7642872074457], 
"eval_len": [78, 18, 17, 20, 117, 42, 27, 53, 98, 184]}

 75%|███████▍  | 749997/1000000 [10:40:16<2:41:05, 25.87it/s]global step 750000, trans_decision ep_re 18.53574144809637

{"global_step": 750000, "eval_re": [31.142476155418333, 6.9029950864869996, 
4.676132757060993, 7.912612723016029, 11.460143873529923, 2.529598145690823, 
77.08366667092261, 17.491314211390634, 11.989028924282717, 14.16944593316466], 
"eval_len": [45, 25, 26, 21, 27, 16, 89, 38, 25, 24]}

 76%|███████▌  | 759999/1000000 [10:49:00<2:34:17, 25.92it/s]global step 760000, trans_decision ep_re 48.461132716580266

{"global_step": 760000, "eval_re": [4.16090626530391, 159.11409520796025, 
4.825711039630892, 13.07819109636254, 170.5657446055767, 37.01967259592534, 
4.637233625677266, 35.969760857379754, 18.780563271085388, 36.459448600900664], 
"eval_len": [24, 159, 17, 41, 119, 51, 20, 47, 42, 51]}

 77%|███████▋  | 769998/1000000 [10:57:22<2:26:52, 26.10it/s]global step 770000, trans_decision ep_re 56.45737333245968

{"global_step": 770000, "eval_re": [9.154828819637599, 32.427466887563895, 
7.202583428195879, 43.336953346762265, 5.969914063667317, 9.6097519671679, 
28.18413317165927, 4.244533618568323, 8.34262813574589, 416.10093988562846], 
"eval_len": [19, 52, 30, 56, 36, 25, 43, 16, 17, 214]}

 78%|███████▊  | 779998/1000000 [11:06:10<2:22:49, 25.67it/s]global step 780000, trans_decision ep_re 123.21084739101818

{"global_step": 780000, "eval_re": [8.385933377903358, 4.154941248145846, 
5.447311980977192, 1112.8092032150332, 2.0316902583386054, 4.800008740030284, 
1.3474642006032649, 52.88665710121957, 28.398357901197045, 11.84690588673354], 
"eval_len": [37, 27, 21, 362, 12, 16, 16, 52, 49, 29]}

 79%|███████▉  | 789999/1000000 [11:14:40<2:14:08, 26.09it/s]global step 790000, trans_decision ep_re 75.0851693379884

{"global_step": 790000, "eval_re": [629.271346070085, 29.213174524067096, 
16.0475503048124, 11.173484614306856, 18.32255151507228, 13.37556389738758, 
8.771979391055313, 8.942055787103238, 8.427803304236011, 7.306183971758306], 
"eval_len": [236, 47, 41, 21, 41, 37, 24, 22, 19, 23]}

 80%|███████▉  | 799999/1000000 [11:23:10<2:07:27, 26.15it/s]global step 800000, trans_decision ep_re 155.4681458573597

{"global_step": 800000, "eval_re": [7.088223192126889, 55.45079864316045, 
3.8650191497937976, 3.482121131026087, 12.307581756687915, 7.86948599774507, 
267.310390150263, 9.483475130944553, 15.951795066251883, 1171.8725683555972], 
"eval_len": [20, 83, 16, 17, 26, 17, 131, 19, 26, 327]}

 81%|████████  | 809999/1000000 [11:31:26<2:01:38, 26.03it/s]global step 810000, trans_decision ep_re 32.330157141743534

{"global_step": 810000, "eval_re": [23.865234206982397, 2.4591210236592578, 
12.213447486600096, 12.73168003054458, 7.281584242749405, 217.52518617952535, 
8.289527931427074, 16.63759064292322, 9.108032237580654, 13.190167435443374], 
"eval_len": [41, 13, 25, 39, 23, 107, 19, 45, 22, 24]}

 82%|████████▏ | 819997/1000000 [11:40:05<1:58:01, 25.42it/s]global step 820000, trans_decision ep_re 72.41223152425418

{"global_step": 820000, "eval_re": [8.17150056766569, 8.23765600707749, 
13.001352254199595, 6.569325740289928, 315.52612252652784, 12.071136970785096, 
340.1476885641944, 0.6221221791360398, 16.387081093648092, 3.3883293390175417], 
"eval_len": [22, 26, 26, 28, 169, 25, 152, 15, 40, 21]}

 83%|████████▎ | 829999/1000000 [11:48:50<1:51:10, 25.49it/s]global step 830000, trans_decision ep_re 93.62566047798927

{"global_step": 830000, "eval_re": [6.254289306023651, 17.56423097872364, 
3.026724970528497, 9.105718021889928, 172.06621261697111, 7.148335161602757, 
16.329737698764145, 24.58460417356212, 676.3288854763332, 3.8478663754935973], 
"eval_len": [26, 43, 21, 21, 98, 20, 40, 43, 212, 27]}

 84%|████████▍ | 839999/1000000 [11:57:21<1:44:55, 25.41it/s]global step 840000, trans_decision ep_re 54.58223511215105

{"global_step": 840000, "eval_re": [8.89787607593793, 9.452994003302118, 
437.037895826263, 11.319816683068387, 7.134913211378569, 31.305984487902922, 
4.263497702166932, 16.66936525400546, 12.204262699977264, 7.535745177507958], 
"eval_len": [26, 26, 174, 23, 18, 49, 19, 39, 22, 23]}

 85%|████████▍ | 849998/1000000 [12:05:54<1:34:31, 26.45it/s]global step 850000, trans_decision ep_re 212.3647561579889

{"global_step": 850000, "eval_re": [7.0107576497542565, 1317.8198586950268, 
8.442006567679611, 2.813830853274139, 2.947898462357508, 10.16306279110726, 
4.111533625213457, 264.4130810520773, 498.4564192392832, 7.4691126441155475], 
"eval_len": [22, 372, 32, 21, 21, 21, 24, 114, 199, 28]}

 86%|████████▌ | 859998/1000000 [12:14:24<1:28:20, 26.41it/s]global step 860000, trans_decision ep_re 87.82100591052549

{"global_step": 860000, "eval_re": [1.3187152975770942, 8.652136382243375, 
546.5081071698577, 4.872923728105238, 53.9427444036405, 5.128004648875796, 
229.53704974464188, 14.561512568513244, 11.376367257611447, 2.3124979041885325],
"eval_len": [14, 18, 189, 34, 57, 22, 139, 37, 35, 18]}

 87%|████████▋ | 869998/1000000 [12:22:55<1:21:10, 26.69it/s]global step 870000, trans_decision ep_re 31.986948773128738

{"global_step": 870000, "eval_re": [5.694860170047468, 1.6169001045867912, 
3.6574510446738193, 168.06166243639777, 8.044870608891966, 8.255314930675572, 
14.122382786971766, 94.78976308924504, 7.606903255302908, 8.019379304494306], 
"eval_len": [23, 15, 21, 108, 19, 21, 53, 67, 26, 26]}

 88%|████████▊ | 879998/1000000 [12:31:40<1:17:09, 25.92it/s]global step 880000, trans_decision ep_re 32.53806518204053

{"global_step": 880000, "eval_re": [3.89350757698086, 3.159141154982814, 
15.528134819538444, 232.35584183206743, 8.464604107979262, 6.90783317328093, 
1.895258725937713, 4.87470866443118, 40.375283254466446, 7.926338510740202], 
"eval_len": [18, 18, 39, 133, 23, 32, 16, 17, 51, 18]}

 89%|████████▉ | 889998/1000000 [12:40:01<1:09:22, 26.43it/s]global step 890000, trans_decision ep_re 37.598674762159575

{"global_step": 890000, "eval_re": [0.6709774904720665, 222.4018379096142, 
7.404652462766032, 11.238039223549835, 77.13171353989306, 9.756497635688367, 
4.701439081442399, 5.978823604183343, 28.479124989026587, 8.223641684959905], 
"eval_len": [20, 117, 25, 40, 67, 22, 16, 23, 49, 26]}

 90%|████████▉ | 899999/1000000 [12:48:31<1:03:37, 26.19it/s]global step 900000, trans_decision ep_re 82.51547516140371

{"global_step": 900000, "eval_re": [31.46007968220814, 12.667229057627585, 
467.81121061218096, 8.181522685661871, 2.325158804960105, 13.096143721371975, 
104.56995364752173, 149.73593569762775, 12.384424264542494, 22.92309344033443], 
"eval_len": [48, 38, 166, 24, 17, 26, 77, 87, 26, 44]}

 91%|█████████ | 909999/1000000 [12:57:10<57:08, 26.25it/s]global step 910000, trans_decision ep_re 25.654658020342602

{"global_step": 910000, "eval_re": [7.407362769678865, 3.396741472972926, 
6.6703090010496995, 165.52418784448167, 8.443272415843463, 3.818334459214364, 
9.51637836019856, 7.764393491571582, 32.52418829061128, 11.481412097803606], 
"eval_len": [19, 19, 33, 135, 20, 15, 24, 16, 51, 26]}

 92%|█████████▏| 919999/1000000 [13:05:23<51:17, 25.99it/s]global step 920000, trans_decision ep_re 8.864699710420085

{"global_step": 920000, "eval_re": [3.8239374932734513, 2.875341489863236, 
8.666615714034382, 8.754334252988793, 10.850814278457356, 12.015666097628952, 
7.859133517373653, 15.327445641528184, 6.446718782685586, 12.02698983636726], 
"eval_len": [17, 26, 19, 19, 20, 32, 20, 25, 26, 23]}

 93%|█████████▎| 929999/1000000 [13:14:00<43:56, 26.55it/s]global step 930000, trans_decision ep_re 84.0739780759872

{"global_step": 930000, "eval_re": [7.051029259993185, 3.412027732577742, 
24.647073055880515, 88.64245040261355, 689.6736390016426, 3.790393418717099, 
6.017954699406064, 7.864230812635212, 1.279978266739362, 8.361004109666586], 
"eval_len": [19, 33, 43, 106, 270, 33, 21, 19, 22, 25]}

 94%|█████████▍| 939999/1000000 [13:22:20<37:41, 26.53it/s]global step 940000, trans_decision ep_re 33.48738216978286

{"global_step": 940000, "eval_re": [5.871162396605921, 3.3556955686355274, 
9.331097307066747, 30.04770719911358, 111.5772113543153, 9.148410048174119, 
57.56171722639607, 21.920844377085658, 46.401922181893745, 39.65805403854193], 
"eval_len": [22, 15, 20, 57, 98, 35, 65, 44, 54, 54]}

 95%|█████████▍| 949999/1000000 [13:30:34<31:23, 26.55it/s]global step 950000, trans_decision ep_re 185.00394041514568

{"global_step": 950000, "eval_re": [19.15854698970755, 252.13141977990549, 
371.7984383849, 17.39034149598051, 32.85357652753493, 711.6008750023935, 
19.35484095486033, 7.851664809552017, 13.244557076665261, 404.6551431299574], 
"eval_len": [48, 115, 143, 41, 52, 244, 47, 22, 38, 163]}

 96%|█████████▌| 959998/1000000 [13:38:55<24:52, 26.80it/s]global step 960000, trans_decision ep_re 176.40228590292102

{"global_step": 960000, "eval_re": [20.28106697891072, 4.205817563084125, 
14.995428939845317, 941.8979883047771, 471.4516834885842, 12.00328901734403, 
256.89839608648856, 3.9245414808448302, 17.07148654179209, 21.29316062753908], 
"eval_len": [45, 18, 37, 264, 190, 40, 138, 14, 25, 40]}

 97%|█████████▋| 969997/1000000 [13:47:30<18:53, 26.47it/s]global step 970000, trans_decision ep_re 13.515785608499433

{"global_step": 970000, "eval_re": [34.8483474505991, 36.69969261045786, 
13.044356100332063, 5.852258997998545, 9.833597933188141, 7.187349812812509, 
7.625275258928795, 3.3431520325398565, 4.909504100875285, 11.814321787262175], 
"eval_len": [51, 52, 48, 18, 23, 25, 17, 19, 24, 35]}

 98%|█████████▊| 979998/1000000 [13:55:41<12:29, 26.70it/s]global step 980000, trans_decision ep_re 51.5189922320575

{"global_step": 980000, "eval_re": [8.094722905740053, 2.6455098870632074, 
12.887101074733366, 37.202414103919885, 4.019961942092903, 11.728202956933808, 
4.620160787584962, 10.82921144049222, 409.84119230740146, 13.321444914613057], 
"eval_len": [21, 15, 23, 58, 16, 26, 17, 26, 157, 22]}

 99%|█████████▉| 989997/1000000 [14:04:03<06:22, 26.12it/s]global step 990000, trans_decision ep_re 10.120785884941528

{"global_step": 990000, "eval_re": [3.384001608245915, 7.714983951578396, 
10.460318385184534, 4.610442150394357, 19.291801893727563, 12.021977187460777, 
15.220951251912913, 8.381978298154403, 9.96559865144799, 10.15580547130843], 
"eval_len": [22, 27, 28, 27, 31, 28, 26, 22, 20, 23]}

100%|█████████▉| 999999/1000000 [14:12:24<00:00, 26.59it/s]global step 1000000, trans_decision ep_re 186.08844134598976

{"global_step": 1000000, "eval_re": [19.876789135738832, 401.3155614010744, 
16.321504443661713, 3.166160543951312, 9.605296211193545, 494.4182664424051, 
5.924684749834865, 5.40641172994587, 7.062674438223108, 897.7870643638689], 
"eval_len": [43, 165, 43, 16, 21, 196, 21, 23, 18, 302]}

100%|██████████| 1000000/1000000 [14:12:40<00:00, 19.55it/s]
