
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:40<13:08:35, 20.92it/s]global step 10000, trans_decision ep_re -85.76911625235252

{"global_step": 10000, "eval_re": [-5.899727966052546, -46.30608191946803, 
1.8496213442960876, -331.4338989639643, -44.0666016893845, 2.1017445845154104, 
-368.11433542015965, -9.726498282930113, -18.78545273731026, 
-37.30993147306746], "eval_len": [46, 52, 19, 1000, 80, 47, 1000, 81, 254, 211]}

  2%|▏         | 19999/1000000 [16:20<12:56:33, 21.03it/s]global step 20000, trans_decision ep_re -96.1684331002908

{"global_step": 20000, "eval_re": [17.94076915587287, -500.3709064838057, 
-24.529427341521476, 24.804065565431692, 22.93339620014173, -381.36092932430734,
-84.09214650686896, 2.0876338432337906, 8.011443739114132, -47.10822985019878], 
"eval_len": [67, 1000, 47, 44, 22, 1000, 165, 47, 69, 91]}

  3%|▎         | 29998/1000000 [27:10<12:49:11, 21.02it/s]global step 30000, trans_decision ep_re -57.37508838645525

{"global_step": 30000, "eval_re": [6.610374927869579, -57.739704646326814, 
-92.69922091515667, 19.308478496300676, -334.63189004220345, 
-21.686516183502487, -51.7748291194794, 9.844638382269284, -0.8086773958833463, 
-50.17353736843985], "eval_len": [22, 89, 205, 38, 1000, 60, 175, 57, 16, 161]}

  4%|▍         | 39999/1000000 [38:00<12:59:56, 20.51it/s]global step 40000, trans_decision ep_re -39.892379513211

{"global_step": 40000, "eval_re": [-197.66323124858764, -28.027593018793674, 
-127.01394107742014, 15.234068931824588, -20.526937027383834, 
2.3217652423145374, -19.22414874780054, -3.459944758613731, 6.210403962315438, 
-26.774237389965002], "eval_len": [1000, 70, 144, 43, 168, 31, 39, 21, 41, 88]}

  5%|▍         | 49999/1000000 [48:40<12:38:35, 20.87it/s]global step 50000, trans_decision ep_re -38.97379942169131

{"global_step": 50000, "eval_re": [13.715450115826131, 15.881575846674604, 
-116.5606090809251, -12.501089898640252, -219.0375628985521, -50.41637721751641,
11.679788446682451, 17.11374260923657, 6.05162079176681, -55.66453293146584], 
"eval_len": [83, 50, 1000, 79, 1000, 155, 32, 102, 22, 104]}

  6%|▌         | 59999/1000000 [59:30<12:37:39, 20.68it/s]global step 60000, trans_decision ep_re -28.937669256238394

{"global_step": 60000, "eval_re": [-12.356068308682737, -55.577868243082506, 
-79.24860396590265, -13.527180726038402, 23.09578998304126, -10.81603245464579, 
-6.9411494223259265, 20.172754938194863, -125.44377879032785, 
-28.73455557261423], "eval_len": [59, 149, 205, 42, 58, 104, 87, 107, 350, 232]}

  7%|▋         | 69998/1000000 [1:10:10<12:27:56, 20.72it/s]global step 70000, trans_decision ep_re -26.278396176302202

{"global_step": 70000, "eval_re": [-6.30638552868699, 12.053745853500635, 
27.687249633617586, 9.489966202241035, -2.908621393642241, -38.20068675951346, 
-77.60795077526784, -96.88201924778252, -5.069081410426953, -85.0401783370613], 
"eval_len": [39, 59, 43, 39, 37, 196, 179, 200, 19, 1000]}

  8%|▊         | 79999/1000000 [1:21:00<12:12:53, 20.92it/s]global step 80000, trans_decision ep_re -26.407855806545474

{"global_step": 80000, "eval_re": [-31.47246091018492, 20.51061634389981, 
-89.97053649477186, -104.03142395824099, 9.267405844222179, 23.195939279612528, 
-8.801982872329729, 7.000193873106567, -47.95526968525771, -41.821039485510546],
"eval_len": [222, 144, 1000, 1000, 34, 59, 1000, 1000, 138, 1000]}

  9%|▉         | 89999/1000000 [1:31:50<12:02:58, 20.98it/s]global step 90000, trans_decision ep_re -46.69620319146543

{"global_step": 90000, "eval_re": [-15.958253156135449, -37.17340807814216, 
12.466896118203307, -60.86924618265185, -22.442756979256355, -95.27502161803153,
-57.45971052797611, -149.53681887476455, -32.316549046793924, 
-8.397163569105649], "eval_len": [1000, 86, 75, 188, 40, 1000, 423, 1000, 104, 
450]}

 10%|▉         | 99999/1000000 [1:42:50<11:55:28, 20.97it/s]global step 100000, trans_decision ep_re 2.611726919254477

{"global_step": 100000, "eval_re": [-6.832042232509848, 29.644714398823623, 
3.0130432254460944, -34.22202350699962, -12.508250289575736, 40.149198589784774,
44.51854314898127, -62.948520586534805, 16.51604337278709, 8.78656307234192], 
"eval_len": [1000, 71, 35, 61, 19, 72, 588, 1000, 69, 68]}

 11%|█         | 109999/1000000 [1:53:50<11:57:08, 20.68it/s]global step 110000, trans_decision ep_re -27.173596730861043

{"global_step": 110000, "eval_re": [-15.650630488637356, -51.31540139164929, 
-0.6793157187304345, -40.69206707021205, -27.815498572036567, 
-26.732239585836695, -41.30336031658697, 30.967851752935374, 
-15.363026299444456, -83.15227961841198], "eval_len": [124, 306, 1000, 329, 145,
180, 97, 180, 132, 520]}

 12%|█▏        | 119999/1000000 [2:04:30<11:32:52, 21.17it/s]global step 120000, trans_decision ep_re -15.256975820022046

{"global_step": 120000, "eval_re": [5.282474158497796, 18.201877843901723, 
-71.92420136483365, 0.5542395618621038, 2.4993756078778393, -4.917503821896846, 
-29.313845739465552, -16.315507358910377, 6.271495991307911, 
-62.908163078561415], "eval_len": [47, 413, 338, 45, 34, 52, 142, 98, 38, 142]}

 13%|█▎        | 129998/1000000 [2:15:10<11:21:21, 21.28it/s]global step 130000, trans_decision ep_re -55.34205816739743

{"global_step": 130000, "eval_re": [2.832774395202329, -105.30979629570402, 
19.8324318184281, -152.72285364507414, -156.38925228853904, -5.445650615808957, 
-17.129230693543747, -128.00583615563664, 3.173570484314035, -14.2567386776122],
"eval_len": [12, 1000, 420, 1000, 1000, 16, 110, 1000, 66, 115]}

 14%|█▍        | 139999/1000000 [2:26:10<11:12:50, 21.30it/s]global step 140000, trans_decision ep_re -24.745372502788676

{"global_step": 140000, "eval_re": [6.145975635079848, -111.85641784712313, 
19.3914027753897, -98.61519024805419, -11.760854484141058, 26.131243098265394, 
26.10700218009751, -74.80098110336061, -29.767170409968777, 1.5712653759285322],
"eval_len": [61, 1000, 21, 1000, 115, 49, 172, 404, 350, 32]}

 15%|█▍        | 149999/1000000 [2:36:50<11:14:25, 21.01it/s]global step 150000, trans_decision ep_re -0.13155246963798128

{"global_step": 150000, "eval_re": [-5.92112434101692, -8.473985904315107, 
-10.791878047998745, 15.089600599696162, 9.234346788701043, 4.077033855039202, 
20.944685325619563, -33.65335377906989, 10.576348478643379, -2.3971976716785], 
"eval_len": [140, 138, 44, 23, 16, 15, 118, 59, 199, 33]}

 16%|█▌        | 159999/1000000 [2:47:30<10:57:47, 21.28it/s]global step 160000, trans_decision ep_re -19.585032454100222

{"global_step": 160000, "eval_re": [32.460368646713135, -25.06622071645115, 
-22.56722984277107, 7.661484531191008, -24.212791505238425, 19.58565437008719, 
-33.85170331160871, -27.111002948530587, -14.083752690834759, 
-108.66513107355885], "eval_len": [171, 268, 1000, 567, 128, 473, 111, 87, 129, 
1000]}

 17%|█▋        | 169997/1000000 [2:58:10<10:58:33, 21.01it/s]global step 170000, trans_decision ep_re -8.143906970105226

{"global_step": 170000, "eval_re": [10.782539213449146, -29.942389846297587, 
2.1041034029069463, -22.3862165893703, -24.672129535080487, -86.5206439739941, 
66.90581253129031, 13.979716176696847, 12.404913614032157, -24.094774694685196],
"eval_len": [20, 211, 58, 574, 208, 1000, 575, 66, 44, 182]}

 18%|█▊        | 179997/1000000 [3:08:50<10:44:33, 21.20it/s]global step 180000, trans_decision ep_re -30.689210247774774

{"global_step": 180000, "eval_re": [32.71609847052404, -36.78973461198823, 
-15.867335324773645, 23.88448131507661, -117.0214712046103, -42.617680436699544,
-73.79992547894392, 41.71595778686722, -137.75753342885076, 18.64504043565074], 
"eval_len": [178, 1000, 30, 278, 1000, 178, 1000, 219, 1000, 155]}

 19%|█▉        | 189999/1000000 [3:19:40<10:28:41, 21.47it/s]global step 190000, trans_decision ep_re -5.183607351515862

{"global_step": 190000, "eval_re": [-11.689023503441003, 22.9601286742499, 
-22.79128404279566, 41.01000792082328, -9.750726624263699, 38.2714741378243, 
-85.46171339186026, 18.73081445942735, -16.890942112609185, 
-26.224809032513637], "eval_len": [54, 47, 94, 227, 32, 76, 1000, 89, 51, 609]}

 20%|█▉        | 199998/1000000 [3:30:20<10:13:48, 21.72it/s]global step 200000, trans_decision ep_re -15.734699765770742

{"global_step": 200000, "eval_re": [-104.26195363170221, 72.19093495536984, 
30.864392521135148, -114.48188962586886, 20.059946863567692, 
-24.860804281914714, -4.239103198402728, 10.3959222656581, -43.39000218594065, 
0.37555866039097263], "eval_len": [1000, 199, 331, 1000, 72, 83, 166, 23, 364, 
19]}

 21%|██        | 209999/1000000 [3:41:00<10:09:38, 21.60it/s]global step 210000, trans_decision ep_re -17.964408655292093

{"global_step": 210000, "eval_re": [-6.793267998397523, 20.745118155718927, 
0.9628507100281518, 2.888255133501791, -104.58163992674167, -47.861762182040984,
-27.05389323574485, -17.2054425174784, 14.140046243308646, -14.884350935074986],
"eval_len": [103, 72, 112, 68, 1000, 157, 101, 92, 86, 77]}

 22%|██▏       | 219997/1000000 [3:51:30<10:26:27, 20.75it/s]global step 220000, trans_decision ep_re -0.7973096827296189

{"global_step": 220000, "eval_re": [15.996701174441995, -27.137923810447976, 
-2.90855931745267, -15.46159688502927, 90.83454589490223, 25.286423200637056, 
-9.475663358562063, -0.6531282508067295, 13.108510562818742, -97.5624060377975],
"eval_len": [55, 59, 34, 136, 119, 111, 36, 11, 105, 1000]}

 23%|██▎       | 229999/1000000 [4:02:10<10:08:54, 21.08it/s]global step 230000, trans_decision ep_re 5.2384880616674305

{"global_step": 230000, "eval_re": [29.422305226448245, 16.670173724190114, 
12.5286790842271, 13.985267243699273, 9.689124755187107, 3.2035431266758385, 
-86.92303478593206, 23.783348559113264, 13.121104761052141, 16.904368922013294],
"eval_len": [1000, 147, 65, 1000, 90, 39, 1000, 161, 19, 48]}

 24%|██▍       | 239999/1000000 [4:12:50<9:56:33, 21.23it/s]global step 240000, trans_decision ep_re 10.391002977392075

{"global_step": 240000, "eval_re": [-10.603283515071814, 8.643788910572148, 
4.056858738844736, 37.419126262398976, 23.30093502031393, 3.6659479987202195, 
-30.83506903350471, 25.217692970687914, 56.11687726454469, -13.07284484358534], 
"eval_len": [1000, 181, 34, 63, 358, 15, 205, 38, 430, 126]}

 25%|██▍       | 249997/1000000 [4:23:20<9:46:13, 21.32it/s]global step 250000, trans_decision ep_re -2.233885633019267

{"global_step": 250000, "eval_re": [38.12284796882571, -49.37934317081358, 
12.187285486614016, 86.18615734770923, 37.13939268183711, 19.020190395645827, 
-89.71593525391455, 56.83325098003608, -94.37642558112901, -38.356277185003506],
"eval_len": [57, 1000, 1000, 517, 196, 226, 1000, 133, 1000, 75]}

 26%|██▌       | 259998/1000000 [4:34:10<9:40:16, 21.25it/s]global step 260000, trans_decision ep_re -15.381097802759971

{"global_step": 260000, "eval_re": [-16.54882156584013, 58.18637789459683, 
-3.733045516211436, -25.317915860850313, 16.33583979416225, 36.27489822931851, 
-186.916952845978, -9.480499916568473, -5.2836831209427135, -17.32717511928625],
"eval_len": [20, 272, 662, 99, 75, 148, 1000, 39, 28, 160]}

 27%|██▋       | 269997/1000000 [4:44:40<9:34:35, 21.17it/s]global step 270000, trans_decision ep_re -2.0942605465458155

{"global_step": 270000, "eval_re": [-8.905018896006684, 23.39778938034054, 
-12.287233882879994, -6.924919008026742, 32.794579960174914, 7.23456191675117, 
-17.759394226194672, 40.497838481408685, -109.44930102773938, 
30.458491836714007], "eval_len": [56, 49, 109, 39, 59, 31, 20, 107, 315, 134]}

 28%|██▊       | 279999/1000000 [4:55:10<9:23:01, 21.31it/s]global step 280000, trans_decision ep_re -4.911112926605456

{"global_step": 280000, "eval_re": [16.27463527369026, 2.2717606032590396, 
6.79505432645722, -17.546678937486718, -31.93812676141316, 21.003326025550496, 
2.7128968677907404, -17.23417223664006, -67.89872889000827, 36.448904462745894],
"eval_len": [52, 37, 93, 1000, 38, 75, 12, 149, 1000, 766]}

 29%|██▉       | 289997/1000000 [5:05:40<9:15:12, 21.31it/s]global step 290000, trans_decision ep_re 0.978975085663043

{"global_step": 290000, "eval_re": [29.761873283437218, -92.45938547261524, 
13.308367639554353, 16.38295964569929, -5.00557717899272, -0.06386372367017201, 
29.923487924427214, 10.321967885845494, 2.8950336536750463, 4.7248871992699435],
"eval_len": [214, 1000, 53, 55, 132, 18, 315, 45, 65, 48]}

 30%|██▉       | 299997/1000000 [5:16:10<9:05:02, 21.41it/s]global step 300000, trans_decision ep_re 1.1254707444236245

{"global_step": 300000, "eval_re": [16.504399321156967, -58.18399258516163, 
2.036410431835881, 25.990969648394945, 1.3125618726630721, -1.7097427043844942, 
-22.613344921036195, 39.41062757192794, 12.248213164985685, 
-3.7413943561459257], "eval_len": [40, 1000, 39, 303, 78, 167, 697, 153, 25, 
87]}

 31%|███       | 309999/1000000 [5:26:40<9:02:20, 21.20it/s]global step 310000, trans_decision ep_re 5.928233063466487

{"global_step": 310000, "eval_re": [3.2362771910452888, 24.749631864911017, 
26.838352924716048, 28.69885407555963, 15.398548678461594, 7.18424259627434, 
-45.06602055134848, -22.544361251695577, 7.349077697940469, 13.437727408800521],
"eval_len": [14, 342, 360, 122, 42, 26, 237, 169, 54, 360]}

 32%|███▏      | 319997/1000000 [5:37:10<8:44:36, 21.60it/s]global step 320000, trans_decision ep_re -6.610592527051706

{"global_step": 320000, "eval_re": [7.886768545174063, -10.554258298022589, 
-65.730662901031, 30.68833645481348, 0.8985675300847051, -6.599832439024066, 
-7.897750346230582, -25.37241383510464, -22.34426547554656, 32.919585494370125],
"eval_len": [96, 49, 1000, 136, 30, 43, 24, 339, 254, 58]}

 33%|███▎      | 329998/1000000 [5:47:40<8:36:19, 21.63it/s]global step 330000, trans_decision ep_re -1.79901845712241

{"global_step": 330000, "eval_re": [44.55376631472565, -51.98487718188817, 
103.60929395873661, -8.847916859969963, -65.62896116794364, 11.57338411639477, 
41.877651255028766, -4.132036604219949, -48.866827583790496, 
-40.14366081829767], "eval_len": [134, 312, 1000, 29, 721, 225, 318, 312, 1000, 
336]}

 34%|███▍      | 339999/1000000 [5:58:20<8:32:10, 21.48it/s]global step 340000, trans_decision ep_re 9.753722062868826

{"global_step": 340000, "eval_re": [30.31997610859761, 8.132974895527049, 
-41.752172460310355, 149.27810605080566, 37.25033806732829, 3.6738042498712487, 
0.20943561096314967, 6.421876707815038, 20.908888608670605, 
-116.90600721058001], "eval_len": [52, 46, 1000, 592, 77, 219, 171, 35, 40, 
1000]}

 35%|███▍      | 349997/1000000 [6:08:50<8:21:52, 21.59it/s]global step 350000, trans_decision ep_re 19.739025335360097

{"global_step": 350000, "eval_re": [-2.771303853112861, -9.24165921000128, 
47.99851451187228, 25.04964077545428, 12.14636149111227, 9.073273868056924, 
64.35967045993608, 21.74337350298454, 29.96946179857645, -0.9370799912777069], 
"eval_len": [15, 102, 272, 114, 46, 154, 156, 326, 49, 38]}

 36%|███▌      | 359999/1000000 [6:19:20<8:19:01, 21.38it/s]global step 360000, trans_decision ep_re -14.558432102763366

{"global_step": 360000, "eval_re": [3.9533452637500837, 23.077983721121793, 
12.944130503535408, -70.05905887798303, -104.10000720141707, 
-61.583419084563765, 8.028081228558626, -18.92571635137994, 50.25789488410896, 
10.822444886635278], "eval_len": [61, 57, 180, 167, 1000, 1000, 44, 43, 231, 
39]}

 37%|███▋      | 369999/1000000 [6:29:50<8:15:28, 21.19it/s]global step 370000, trans_decision ep_re 11.276528224651816

{"global_step": 370000, "eval_re": [36.29748730518821, -4.510596416019087, 
1.889629250102458, 1.06266168010409, 24.07994835165588, 60.29391718282584, 
5.8633403843366185, -56.34903626463884, 42.10741520238427, 2.030515570578736], 
"eval_len": [91, 63, 101, 157, 37, 357, 75, 312, 348, 39]}

 38%|███▊      | 379997/1000000 [6:40:20<7:56:12, 21.70it/s]global step 380000, trans_decision ep_re -27.615951665081003

{"global_step": 380000, "eval_re": [20.45520450881706, -211.029282311649, 
8.764217172843159, 13.44425145349189, -22.288976197887973, -64.40330501727166, 
-2.13358197555962, -95.2545885358077, 15.594444652498181, 60.69209959971566], 
"eval_len": [1000, 1000, 48, 23, 1000, 1000, 290, 1000, 274, 625]}

 39%|███▉      | 389999/1000000 [6:51:10<7:51:01, 21.58it/s]global step 390000, trans_decision ep_re -2.8567750335893214

{"global_step": 390000, "eval_re": [-55.05625837986035, -1.41764534941022, 
-24.14028790225648, -26.807511794306638, 13.720597146915019, 7.09289595382487, 
20.950115775981512, 5.625878679409293, 10.27423859825225, 21.19022693555754], 
"eval_len": [350, 23, 1000, 113, 159, 90, 91, 473, 397, 78]}

 40%|███▉      | 399998/1000000 [7:01:40<7:36:49, 21.89it/s]global step 400000, trans_decision ep_re -28.948867786752526

{"global_step": 400000, "eval_re": [-44.854080625696895, 28.29581912549955, 
-19.347901540124646, 5.7437199057766115, 11.845915057405817, 
-1.9366715480477714, -124.63789294712038, -156.25876227980507, 
1.1758878979800473, 10.485289086607457], "eval_len": [525, 134, 38, 39, 47, 65, 
1000, 1000, 182, 498]}

 41%|████      | 409999/1000000 [7:12:10<7:43:36, 21.21it/s]global step 410000, trans_decision ep_re -0.07612292806025352

{"global_step": 410000, "eval_re": [-26.656254769035915, -3.7199256071035114, 
-12.176145979178608, -34.632317700895356, 49.69035978938478, -9.49056491622739, 
-4.902654270015267, 57.43426480835581, -30.335728696091696, 14.027738060204616],
"eval_len": [1000, 25, 436, 1000, 1000, 41, 140, 84, 68, 214]}

 42%|████▏     | 419997/1000000 [7:22:50<7:25:19, 21.71it/s]global step 420000, trans_decision ep_re -2.3428939784072336

{"global_step": 420000, "eval_re": [15.72877331421981, -78.8526887402113, 
18.54515777055644, -5.363886457842369, 25.83592015202653, 27.423329192721702, 
-12.141115809529348, -32.074410459077896, 21.381632994682523, 
-3.9116517416184253], "eval_len": [215, 1000, 40, 74, 48, 337, 42, 1000, 110, 
41]}

 43%|████▎     | 429999/1000000 [7:33:20<7:21:31, 21.52it/s]global step 430000, trans_decision ep_re -37.439564451113775

{"global_step": 430000, "eval_re": [-102.56077197531872, -2.202923856577894, 
4.677643699369224, 16.95005275757301, -2.831830970459411, 26.02228337124639, 
-54.79420328662066, -170.68117308341414, -149.5699480368864, 
60.595226869950885], "eval_len": [1000, 42, 38, 52, 71, 371, 1000, 416, 1000, 
234]}

 44%|████▍     | 439999/1000000 [7:44:00<7:11:00, 21.65it/s]global step 440000, trans_decision ep_re -5.0056685461851105

{"global_step": 440000, "eval_re": [-5.610770441715365, -28.969709743249158, 
-23.124950704566785, 27.285198856870473, -9.768970851683571, 
-0.7509663396293214, -90.19006222167424, 117.59254890596625, -27.72076361872698,
-8.798239303442417], "eval_len": [22, 145, 123, 1000, 105, 12, 1000, 1000, 88, 
57]}

 45%|████▍     | 449998/1000000 [7:54:40<7:05:16, 21.55it/s]global step 450000, trans_decision ep_re -15.574575442817387

{"global_step": 450000, "eval_re": [-37.32020688121584, 10.587490252745267, 
1.0154628529652885, -30.84932280471758, -19.331468235609492, -47.1723279052105, 
16.873472418495034, 1.5267125766811551, -58.32400686845644, 7.248440166149209], 
"eval_len": [1000, 32, 16, 149, 54, 168, 35, 24, 267, 43]}

 46%|████▌     | 459999/1000000 [8:05:10<7:05:17, 21.16it/s]global step 460000, trans_decision ep_re -21.94451912555956

{"global_step": 460000, "eval_re": [4.574123019410656, 42.43492412798992, 
-74.23191476052057, 5.237469961173242, -50.73776118021223, 5.713573892105843, 
-16.221035142063492, -50.76718764738657, -94.04107863143733, 8.59369510534494], 
"eval_len": [16, 132, 1000, 108, 1000, 15, 1000, 1000, 1000, 45]}

 47%|████▋     | 469998/1000000 [8:15:50<6:50:24, 21.52it/s]global step 470000, trans_decision ep_re -31.303785234618594

{"global_step": 470000, "eval_re": [6.876402837541524, 2.4693276170749567, 
-13.17930645371361, -46.01388852946508, -6.193303358981419, -2.368034575737623, 
-35.70460281150462, -5.315797414996421, -111.68759517941245, 
-101.92105447699123], "eval_len": [20, 100, 28, 97, 1000, 39, 182, 49, 563, 
183]}

 48%|████▊     | 479998/1000000 [8:26:20<6:39:30, 21.69it/s]global step 480000, trans_decision ep_re -46.174302927442305

{"global_step": 480000, "eval_re": [30.82765751163541, -52.982069568358575, 
15.375797019438002, 10.62816494595701, -31.922258618881667, -120.78471031177163,
14.246142314913977, -5.888369716062375, -308.5873285749048, 
-12.656054276388414], "eval_len": [235, 1000, 98, 33, 30, 1000, 92, 19, 822, 
244]}

 49%|████▉     | 489999/1000000 [8:37:00<6:33:46, 21.59it/s]global step 490000, trans_decision ep_re 5.266648238285777

{"global_step": 490000, "eval_re": [-19.899784183936145, -0.3220824570137192, 
-11.375350161472644, 1.7072678744609129, 39.287431419476576, 14.858161514802331,
-11.93575894998876, 3.506323059026419, 26.889890202449564, 9.950384065053237], 
"eval_len": [50, 55, 38, 62, 291, 1000, 40, 42, 232, 59]}

 50%|████▉     | 499999/1000000 [8:47:20<6:30:06, 21.36it/s]global step 500000, trans_decision ep_re -14.474653874312278

{"global_step": 500000, "eval_re": [24.49755345029083, 6.215986542224569, 
-13.409468028586605, -30.206545774919366, -23.317008754737323, 
-16.364470194718155, 21.224848782664026, -28.163795957734134, 
-81.44699214372723, -3.7766466638793967], "eval_len": [67, 1000, 21, 1000, 179, 
83, 28, 54, 1000, 366]}

 51%|█████     | 509998/1000000 [8:58:00<6:11:31, 21.98it/s]global step 510000, trans_decision ep_re 21.97355424052809

{"global_step": 510000, "eval_re": [21.307362317424698, -11.240713096505266, 
5.449836536803215, 15.477660030871801, 17.5508828524806, -5.272084963745623, 
-5.088882870631539, 19.751585218289385, 141.72824265018556, 20.07165373010811], 
"eval_len": [57, 39, 102, 36, 97, 446, 114, 17, 1000, 121]}

 52%|█████▏    | 519998/1000000 [9:08:30<6:05:44, 21.87it/s]global step 520000, trans_decision ep_re 11.23283459052389

{"global_step": 520000, "eval_re": [16.362584193179895, 31.64767608976056, 
38.74264179035207, 33.30243308392812, -30.505880432428718, 14.895699786571548, 
7.053762493370469, 10.012727763286872, -11.056717187085471, 1.873418324303575], 
"eval_len": [40, 75, 347, 1000, 98, 21, 101, 66, 56, 44]}

 53%|█████▎    | 529999/1000000 [9:19:00<6:05:50, 21.41it/s]global step 530000, trans_decision ep_re 3.4224540034469415

{"global_step": 530000, "eval_re": [26.226907674720454, -62.79459347696566, 
27.031228885212812, 14.553861807401944, 49.34575854600765, 20.925922615440918, 
13.816966739995367, 4.143655705229138, -62.409669953016376, 3.384501490443167], 
"eval_len": [86, 793, 39, 177, 179, 98, 1000, 1000, 315, 104]}

 54%|█████▍    | 539999/1000000 [9:29:30<6:00:21, 21.28it/s]global step 540000, trans_decision ep_re -15.746811365286362

{"global_step": 540000, "eval_re": [11.354380709016434, 23.021740070768665, 
-38.06962208686254, -31.822651796853698, 66.36381639003442, 5.714602078170511, 
-40.544595653123274, -31.449782880095356, -24.790109886615458, 
-97.24589059730336], "eval_len": [76, 247, 1000, 49, 200, 50, 361, 42, 63, 
1000]}

 55%|█████▍    | 549999/1000000 [9:40:10<5:51:07, 21.36it/s]global step 550000, trans_decision ep_re 26.35749748079142

{"global_step": 550000, "eval_re": [50.304534861272835, -15.761177886164003, 
-5.066544932495193, 44.7326649999356, -12.916014788120727, 62.624849155062, 
99.24689474976861, -36.42494499862226, 26.228838250914603, 50.605875396362684], 
"eval_len": [1000, 1000, 379, 108, 1000, 185, 522, 40, 132, 355]}

 56%|█████▌    | 559999/1000000 [9:50:40<5:37:13, 21.75it/s]global step 560000, trans_decision ep_re -4.7385859255461265

{"global_step": 560000, "eval_re": [-6.058037736271803, -7.605986254292038, 
77.84213227047341, -39.777934900245924, -26.489190927549604, -49.34792810947879,
8.328694869401547, -10.06905418296992, 11.488890051962878, -5.697444336491023], 
"eval_len": [1000, 101, 428, 129, 117, 400, 48, 46, 217, 11]}

 57%|█████▋    | 569999/1000000 [10:01:10<5:30:23, 21.69it/s]global step 570000, trans_decision ep_re 5.400293677546854

{"global_step": 570000, "eval_re": [42.576664394476076, 28.054980035361705, 
13.826636228610202, -6.0399572380133115, 15.01432233056186, -2.4964923818799942,
9.679181503701859, 13.821006209577742, -59.54583723835064, -0.8875670685769392],
"eval_len": [256, 667, 371, 118, 193, 431, 49, 174, 1000, 138]}

 58%|█████▊    | 579998/1000000 [10:11:50<5:21:40, 21.76it/s]global step 580000, trans_decision ep_re -16.70466887666428

{"global_step": 580000, "eval_re": [-2.1737576690188463, -55.328962434451135, 
-99.05603570623651, -17.474285690441814, -81.95424146919005, 24.70372129875503, 
-58.66329186879101, 34.53820984358619, 24.11458960378526, 64.24736532536018], 
"eval_len": [40, 1000, 1000, 149, 1000, 23, 1000, 198, 89, 88]}

 59%|█████▉    | 589999/1000000 [10:22:30<5:17:53, 21.50it/s]global step 590000, trans_decision ep_re -27.119338001088824

{"global_step": 590000, "eval_re": [-3.7998435896512417, -5.288383560375195, 
-13.722658990623394, -0.6516527256227815, 16.968098746148453, 
11.207178085339146, -38.53446760718235, -101.66480838879778, -68.6699627294486, 
-67.03687925067446], "eval_len": [56, 61, 106, 24, 108, 120, 152, 1000, 1000, 
1000]}

 60%|█████▉    | 599999/1000000 [10:33:00<5:13:00, 21.30it/s]global step 600000, trans_decision ep_re -5.9319506087558675

{"global_step": 600000, "eval_re": [-34.22392585952036, -42.70120786035712, 
11.052540714084357, 12.93233778701997, 15.714217702849727, 12.668693987846202, 
-17.203995364669353, 4.621762087725542, 6.185209282852292, -28.365138565389927],
"eval_len": [169, 220, 66, 343, 124, 230, 153, 38, 42, 613]}

 61%|██████    | 609999/1000000 [10:43:30<5:02:34, 21.48it/s]global step 610000, trans_decision ep_re -6.442620573856457

{"global_step": 610000, "eval_re": [-22.511036825470196, -1.1729700722883583, 
2.15759368705583, -9.10311188335671, 8.333082932498147, -71.31913803671902, 
15.76475465150256, 13.259133051501365, 2.2581641852762457, -2.092677428564434], 
"eval_len": [1000, 41, 54, 36, 93, 1000, 36, 21, 1000, 105]}

 62%|██████▏   | 619999/1000000 [10:54:10<4:52:43, 21.64it/s]global step 620000, trans_decision ep_re -2.0608509050986648

{"global_step": 620000, "eval_re": [-15.826289118203805, -38.197792340185806, 
4.469275571535247, -4.1430287720702275, -65.48131026641605, 7.69380022833769, 
-7.652331168022232, 18.924955809949683, 26.024171205161228, 53.58003979892762], 
"eval_len": [97, 111, 40, 1000, 357, 35, 1000, 21, 100, 143]}

 63%|██████▎   | 629998/1000000 [11:04:40<4:45:12, 21.62it/s]global step 630000, trans_decision ep_re -22.503063111293702

{"global_step": 630000, "eval_re": [-16.316783932562373, 4.275329302809868, 
-72.47333011028317, -8.119681436382804, -43.24070484007196, -43.422116672662035,
-40.49585061420072, -39.134933864671304, 51.469809546147665, 
-17.572368491060217], "eval_len": [71, 16, 223, 198, 171, 161, 1000, 155, 141, 
38]}

 64%|██████▍   | 639998/1000000 [11:15:10<4:35:39, 21.77it/s]global step 640000, trans_decision ep_re -2.4856667205002028

{"global_step": 640000, "eval_re": [9.419487749444102, 28.19908644073519, 
-42.587411585694504, 3.0881065041758866, 10.466831932642032, 10.839405648577163,
-7.05208349870455, 2.4959516533921793, -35.51611558474393, -4.209926464825596], 
"eval_len": [54, 96, 1000, 445, 399, 25, 1000, 15, 1000, 38]}

 65%|██████▍   | 649999/1000000 [11:25:50<4:30:03, 21.60it/s]global step 650000, trans_decision ep_re -16.0248076125003

{"global_step": 650000, "eval_re": [-30.853011878513005, 8.25990647952342, 
-15.432374360954178, -83.42849441854031, -73.24041817819622, 13.400799890149585,
19.14432302041282, -21.682603699377854, 10.781709661887795, 12.802087358604977],
"eval_len": [258, 21, 36, 1000, 220, 60, 1000, 105, 1000, 498]}

 66%|██████▌   | 659998/1000000 [11:36:30<4:18:02, 21.96it/s]global step 660000, trans_decision ep_re -2.3018799760878466

{"global_step": 660000, "eval_re": [-84.7640623475441, -1.9725708076384372, 
-0.15921647835665276, -8.076813501936432, 26.808988837113354, 7.258214685304861,
72.77733011228555, -8.8723263905511, -20.229046209880423, -5.789297659675086], 
"eval_len": [88, 116, 13, 188, 73, 39, 218, 61, 323, 437]}

 67%|██████▋   | 669999/1000000 [11:46:50<4:20:27, 21.12it/s]global step 670000, trans_decision ep_re 0.5256845122586019

{"global_step": 670000, "eval_re": [36.44682553902007, -29.44383190446747, 
13.338989870901386, 7.799316245638021, 28.478817617506465, 7.100810335587469, 
-24.923576280063006, 25.023891242544398, -51.18984558877579, 
-7.374551955305523], "eval_len": [89, 459, 98, 51, 36, 181, 220, 41, 1000, 
1000]}

 68%|██████▊   | 679998/1000000 [11:57:30<4:05:02, 21.77it/s]global step 680000, trans_decision ep_re -15.320669789345146

{"global_step": 680000, "eval_re": [-39.548936700045644, -15.129584456629049, 
22.953460617739847, -69.5462732923019, -3.5351949608699, -92.13889286501308, 
4.904210153379882, 83.64296851057443, -12.410906626696232, -32.39754827358982], 
"eval_len": [232, 33, 24, 1000, 15, 301, 47, 382, 852, 224]}

 69%|██████▉   | 689999/1000000 [12:08:00<4:00:40, 21.47it/s]global step 690000, trans_decision ep_re -10.145266422332586

{"global_step": 690000, "eval_re": [0.6495223483888845, 19.241914627607844, 
-17.565365508423696, 2.251694371708635, -38.49043006027829, -25.16691073169102, 
-4.14938968755418, -26.413253596666333, -7.585495035957163, -4.22495095046052], 
"eval_len": [42, 74, 69, 100, 1000, 163, 39, 65, 28, 71]}

 70%|██████▉   | 699997/1000000 [12:18:30<3:50:11, 21.72it/s]global step 700000, trans_decision ep_re -13.98000337557404

{"global_step": 700000, "eval_re": [60.10792811786099, 13.694514707290951, 
-53.92737846040774, -86.89748275152732, -79.69427613919245, 67.47539218826483, 
-15.400836740240226, -18.118671023206055, -2.8173560838364726, 
-24.221867570746902], "eval_len": [1000, 29, 245, 720, 325, 267, 38, 49, 1000, 
84]}

 71%|███████   | 709998/1000000 [12:29:10<3:43:16, 21.65it/s]global step 710000, trans_decision ep_re -0.15743820412714732

{"global_step": 710000, "eval_re": [-23.259159456606184, 0.057017564022636025, 
54.14994596018567, -0.9912691767493285, -21.192727268687904, 42.25857976198958, 
3.253794119414265, 24.53039242997451, -105.45517930306902, 25.074223328254295], 
"eval_len": [56, 20, 295, 105, 150, 182, 20, 106, 1000, 130]}

 72%|███████▏  | 719998/1000000 [12:39:40<3:34:31, 21.75it/s]global step 720000, trans_decision ep_re -2.44490217622553

{"global_step": 720000, "eval_re": [-53.96915491633112, 3.422315608403598, 
15.861794314716489, -3.3901033436534878, 28.93098726193572, -35.07646467304695, 
74.84227711134815, 6.690552742394447, -54.39011817918239, -7.371107688839767], 
"eval_len": [709, 19, 102, 38, 46, 500, 174, 138, 1000, 25]}

 73%|███████▎  | 729997/1000000 [12:50:10<3:26:20, 21.81it/s]global step 730000, trans_decision ep_re -1.4136901052075284

{"global_step": 730000, "eval_re": [17.420575918715546, 0.34869360964979224, 
-10.308153257895087, 2.9203245932027495, -54.928392028118694, 
28.884544685282172, 4.847252151926328, 5.018179905021889, -4.806402763338769, 
-3.533523866521211], "eval_len": [33, 18, 83, 17, 1000, 40, 50, 216, 383, 27]}

 74%|███████▍  | 739997/1000000 [13:00:40<3:21:29, 21.51it/s]global step 740000, trans_decision ep_re 5.396102093092051

{"global_step": 740000, "eval_re": [-19.092597795100108, 10.337170674714889, 
-7.53941818458666, 48.678693929686816, 8.891122590297943, 4.3738690005821095, 
18.881540478861407, -1.8781984635345728, -24.21168488836301, 
15.520523588361701], "eval_len": [134, 38, 65, 1000, 82, 123, 39, 199, 255, 
178]}

 75%|███████▍  | 749999/1000000 [13:11:10<3:14:03, 21.47it/s]global step 750000, trans_decision ep_re -4.532109842601943

{"global_step": 750000, "eval_re": [-35.87597757058445, -7.7025622505362925, 
-58.51837822121697, 1.101266011409743, 49.57105960047556, -1.6062260002135615, 
1.3214664321214564, -20.799369391233697, 21.36278594751976, 5.824837016239021], 
"eval_len": [1000, 47, 1000, 28, 145, 67, 274, 47, 59, 20]}

 76%|███████▌  | 759999/1000000 [13:21:40<3:06:01, 21.50it/s]global step 760000, trans_decision ep_re -2.7633773618938475

{"global_step": 760000, "eval_re": [18.284909237840974, -8.141831861738297, 
13.684394470886959, -20.159526506571375, -10.636554109637105, 
24.530424723431253, 40.19311351814528, 66.80301177273994, 74.76740629485975, 
-226.95912115889584], "eval_len": [48, 30, 23, 38, 50, 119, 158, 433, 1000, 
982]}

 77%|███████▋  | 769998/1000000 [13:32:20<2:54:45, 21.94it/s]global step 770000, trans_decision ep_re -1.1923717828299398

{"global_step": 770000, "eval_re": [1.1461414688686364, -16.185164714699873, 
-6.54146230687291, 41.29866818648808, -46.7712379511116, 11.106311598277886, 
-14.413101407838212, 23.654107164459084, -6.070204957954598, 
0.8522250920841121], "eval_len": [165, 67, 184, 1000, 365, 55, 276, 1000, 104, 
150]}

 78%|███████▊  | 779999/1000000 [13:42:50<2:53:07, 21.18it/s]global step 780000, trans_decision ep_re 7.775812684504308

{"global_step": 780000, "eval_re": [40.26717317669484, 7.164774277837989, 
2.6106297533725633, 21.67237822223902, -35.31764672731781, 8.188413643645156, 
32.56059416244007, -37.980383646806956, 13.642524240936458, 24.949669742001742],
"eval_len": [480, 41, 20, 79, 1000, 47, 32, 113, 196, 35]}

 79%|███████▉  | 789997/1000000 [13:53:20<2:45:16, 21.18it/s]global step 790000, trans_decision ep_re -8.096223813129479

{"global_step": 790000, "eval_re": [-100.77623877899575, -7.064632787966455, 
-18.738037943529264, 62.20050781409079, -49.945113683709565, 14.129879150232906,
3.174515562480409, 3.172550309228864, -16.715488959199995, 29.59982118607329], 
"eval_len": [459, 114, 142, 231, 140, 114, 49, 79, 224, 62]}

 80%|███████▉  | 799997/1000000 [14:03:50<2:34:18, 21.60it/s]global step 800000, trans_decision ep_re -2.318955730239991

{"global_step": 800000, "eval_re": [9.638572745225765, -31.431372982065444, 
-41.062713657547036, -2.0062877203210867, -21.201906018096604, 
-15.28365572586026, -1.3831231329281448, 50.19248542010154, 13.5667993144922, 
15.78164445459916], "eval_len": [71, 253, 1000, 16, 37, 140, 38, 666, 526, 
1000]}

 81%|████████  | 809998/1000000 [14:14:30<2:27:17, 21.50it/s]global step 810000, trans_decision ep_re -16.81814136441388

{"global_step": 810000, "eval_re": [-3.4656289105662568, -13.431822365986132, 
-30.929397136378284, -26.676958117468455, 30.043365145760568, 
-6.328523249670939, -7.094397263384529, -137.82293971074802, 14.725099055935415,
12.799788908367882], "eval_len": [14, 52, 1000, 1000, 93, 1000, 324, 1000, 597, 
21]}

 82%|████████▏ | 819998/1000000 [14:25:10<2:17:33, 21.81it/s]global step 820000, trans_decision ep_re -31.17002790932341

{"global_step": 820000, "eval_re": [89.0464463241469, -11.055914412570132, 
-0.10017969616205424, 26.054927251264978, -10.866107559854903, 
-62.633881951837196, -87.74806853399116, -134.50062953376687, 
-29.368419263200956, -90.52845171726271], "eval_len": [264, 23, 55, 25, 168, 
209, 1000, 1000, 321, 506]}

 83%|████████▎ | 829999/1000000 [14:35:40<2:12:12, 21.43it/s]global step 830000, trans_decision ep_re -8.395767911465594

{"global_step": 830000, "eval_re": [-10.489299269386125, 59.87473251007314, 
-53.04938090053721, -0.8877879055295341, -7.21143025829876, -53.899771247415714,
-1.6542663815757506, -49.60803696353026, 42.59642635613978, -9.628865054595488],
"eval_len": [57, 318, 176, 158, 1000, 128, 327, 1000, 367, 23]}

 84%|████████▍ | 839999/1000000 [14:46:20<2:06:03, 21.16it/s]global step 840000, trans_decision ep_re -44.93837456454779

{"global_step": 840000, "eval_re": [-2.3383162267293485, 21.71122626799547, 
-7.936224894660835, 17.77661889318112, -24.618777770126123, -535.2486696368316, 
6.9978353506767785, 59.459979090497264, 38.89106247520909, -24.07847919468974], 
"eval_len": [112, 1000, 1000, 34, 1000, 1000, 66, 212, 200, 86]}

 85%|████████▍ | 849999/1000000 [14:57:01<1:55:47, 21.59it/s]global step 850000, trans_decision ep_re 4.729961621973354

{"global_step": 850000, "eval_re": [58.13060715759978, 1.0232593207676954, 
4.0065838033187156, 20.89772265180592, -2.7087935622560178, -52.63879330953377, 
-98.6633865180677, 96.995172445513, 6.448362462774879, 13.808881767811014], 
"eval_len": [171, 23, 26, 248, 337, 416, 625, 152, 464, 37]}

 86%|████████▌ | 859999/1000000 [15:07:31<1:48:26, 21.52it/s]global step 860000, trans_decision ep_re -21.747289615839456

{"global_step": 860000, "eval_re": [5.393990499597189, 42.84515080337068, 
-123.32708498913918, -9.733056243245603, -5.062827817134855, 11.496749214487592,
-118.11253564468775, 11.156761164275364, -16.94828536104612, 
-15.18175778487192], "eval_len": [290, 1000, 1000, 931, 97, 38, 1000, 36, 287, 
36]}

 87%|████████▋ | 869999/1000000 [15:18:01<1:40:32, 21.55it/s]global step 870000, trans_decision ep_re -17.59643749012168

{"global_step": 870000, "eval_re": [-7.406203366552613, -23.20006207678204, 
-10.454305056046785, -60.45741824562074, -88.15891529447337, -3.548752311420441,
9.734135925829467, -8.812688025837012, 12.513778392114839, 3.826055157571928], 
"eval_len": [1000, 223, 226, 1000, 1000, 172, 48, 132, 56, 151]}

 88%|████████▊ | 879999/1000000 [15:28:41<1:33:41, 21.35it/s]global step 880000, trans_decision ep_re -17.663757589909963

{"global_step": 880000, "eval_re": [-7.48624327704473, 3.0269048520452344, 
-22.005229135965436, -23.86645012062293, -0.1006745457031053, 
-111.51604365854284, -2.5461997758342, 11.146624499546375, 15.925583065541048, 
-39.21584780251904], "eval_len": [1000, 132, 1000, 49, 17, 449, 126, 23, 223, 
1000]}

 89%|████████▉ | 889999/1000000 [15:39:21<1:25:02, 21.56it/s]global step 890000, trans_decision ep_re -3.5862480106266355

{"global_step": 890000, "eval_re": [8.165195807882458, -3.7568165905339326, 
-18.134204749953867, -25.271526649819194, 6.974233881749514, 6.377828782833114, 
-12.503276141779159, -17.023585938215177, 11.84771344770825, 7.461958043861642],
"eval_len": [175, 39, 38, 204, 19, 15, 17, 31, 38, 153]}

 90%|████████▉ | 899999/1000000 [15:49:51<1:17:17, 21.57it/s]global step 900000, trans_decision ep_re -2.085379041375236

{"global_step": 900000, "eval_re": [-63.57815741466185, -9.038960734291525, 
7.218389516222983, -17.749067848495773, 11.726296693475504, 2.852042315384138, 
20.726459328754565, 33.66821622728853, 11.799837692842228, -18.478846190271167],
"eval_len": [118, 123, 171, 153, 1000, 177, 80, 37, 53, 55]}

 91%|█████████ | 909999/1000000 [16:00:21<1:09:32, 21.57it/s]global step 910000, trans_decision ep_re -26.84395089447613

{"global_step": 910000, "eval_re": [-14.634191089651328, -17.432173610697454, 
-104.60436540985326, -133.55591739797183, -0.9398976975627314, 
-5.745728969053049, -33.53592414529368, -24.1828803977058, 16.6867903343729, 
49.50477943865493], "eval_len": [30, 305, 281, 511, 30, 1000, 296, 84, 39, 246]}

 92%|█████████▏| 919998/1000000 [16:10:51<1:01:15, 21.77it/s]global step 920000, trans_decision ep_re -3.8363194353555343

{"global_step": 920000, "eval_re": [13.058110515048606, -91.30091606116224, 
41.333878483057916, -47.74180695331044, 42.20139127450594, -8.092803490121192, 
-0.4055578418367283, 4.982194309538464, -4.208225144940815, 11.810540555665144],
"eval_len": [20, 1000, 151, 261, 1000, 211, 1000, 1000, 42, 42]}

 93%|█████████▎| 929997/1000000 [16:21:31<54:17, 21.49it/s]global step 930000, trans_decision ep_re 3.929862654695339

{"global_step": 930000, "eval_re": [79.71937179672199, 53.19329870838714, 
20.518524314631374, -14.166246084288538, -26.02259991386175, 5.224935022767827, 
35.98491204876537, 15.677946097717468, 28.224397245071955, -159.05591268895947],
"eval_len": [376, 180, 1000, 464, 1000, 1000, 74, 44, 59, 560]}

 94%|█████████▍| 939998/1000000 [16:32:11<46:30, 21.51it/s]global step 940000, trans_decision ep_re 3.160553580201861

{"global_step": 940000, "eval_re": [-14.13523544575563, 11.790257208891363, 
13.030283585594287, 22.678000822557046, -22.213619538613685, -17.03700173503821,
-2.39062209932118, 20.310060073490263, 18.000209966741274, 1.5732029634730869], 
"eval_len": [38, 217, 19, 46, 98, 101, 95, 39, 68, 51]}

 95%|█████████▍| 949999/1000000 [16:42:41<38:54, 21.42it/s]global step 950000, trans_decision ep_re -13.315250168404258

{"global_step": 950000, "eval_re": [8.755678211159726, 12.63928497736964, 
-6.522251738490922, -28.123418631855515, 5.553187351143071, 32.34584865864703, 
14.180507600966326, -15.031527317327798, -47.945888360277245, 
-109.00392243537691], "eval_len": [108, 82, 19, 26, 107, 66, 82, 1000, 227, 
903]}

 96%|█████████▌| 959998/1000000 [16:53:11<30:44, 21.68it/s]global step 960000, trans_decision ep_re -6.0548552929502275

{"global_step": 960000, "eval_re": [-88.19873301102527, 11.997892493475026, 
13.203990083814574, 0.46967807647513116, -9.296880776357407, 53.29651284054442, 
27.02757524516027, -47.16970673201061, -35.1164223577907, 13.237541208212274], 
"eval_len": [611, 221, 58, 322, 65, 141, 100, 1000, 63, 136]}

 97%|█████████▋| 969998/1000000 [17:03:41<22:51, 21.88it/s]global step 970000, trans_decision ep_re -5.842490237956638

{"global_step": 970000, "eval_re": [24.80329782804931, -52.30563742926401, 
-48.799386578477176, 36.4734449386445, 21.644137756526987, -2.8038959111118302, 
-0.802647321324475, 23.883573243682505, -76.13756551082966, 15.61977660453747], 
"eval_len": [44, 1000, 1000, 46, 40, 39, 18, 40, 1000, 263]}

 98%|█████████▊| 979999/1000000 [17:14:11<15:48, 21.09it/s]global step 980000, trans_decision ep_re -16.190840969998234

{"global_step": 980000, "eval_re": [-30.410555978478108, -0.8197942745169187, 
-16.354211793822692, 19.90789280576798, 19.56434257618374, -65.34805343642194, 
-140.09163067403932, 6.925960973413602, 38.7100280085061, 6.007612093425217], 
"eval_len": [1000, 70, 38, 1000, 99, 1000, 548, 20, 92, 178]}

 99%|█████████▉| 989998/1000000 [17:24:51<07:39, 21.76it/s]global step 990000, trans_decision ep_re -20.473799078043747

{"global_step": 990000, "eval_re": [12.474213170819791, -41.453216154085666, 
-6.662043715436026, -1.0919217310729858, -78.41922213517783, -1.52208568414776, 
-85.09074398147733, 4.469360278984556, -20.918271086262237, 13.47594025741798], 
"eval_len": [62, 275, 439, 15, 1000, 34, 1000, 256, 38, 254]}

100%|█████████▉| 999999/1000000 [17:35:21<00:00, 21.27it/s]global step 1000000, trans_decision ep_re -17.647934490749968

{"global_step": 1000000, "eval_re": [-34.75045650784119, 66.75191041730226, 
24.83167057170391, -31.858412405708943, 26.741230939944597, -95.72033710908458, 
-20.96529561873923, -42.35812688430163, -74.77273372316752, 5.621205412392655], 
"eval_len": [452, 241, 52, 1000, 255, 1000, 1000, 316, 1000, 251]}

100%|██████████| 1000000/1000000 [17:35:47<00:00, 15.79it/s]
