
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:40<11:12:13, 24.55it/s]global step 10000, trans_decision ep_re 427.8470445645401

{"global_step": 10000, "eval_re": [391.31631988620705, 335.3953368592012, 
457.29900583239015, 601.3412795101516, 444.13618586726767, 416.23591490679416, 
432.7291962583223, 594.1731661874599, 410.290909093008, 195.5531312445986], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 424]}

  2%|▏         | 19998/1000000 [14:10<10:47:30, 25.22it/s]global step 20000, trans_decision ep_re 307.66590793994953

{"global_step": 20000, "eval_re": [334.87221894596337, 450.0323235568483, 
19.02997930187007, 518.3477607184701, 58.990813799689846, 441.1927977333111, 
46.116532960043024, 520.2273729962844, 598.7471164558232, 89.10216293119197], 
"eval_len": [578, 1000, 31, 1000, 82, 1000, 66, 1000, 1000, 117]}

  3%|▎         | 29997/1000000 [23:20<10:56:56, 24.61it/s]global step 30000, trans_decision ep_re 633.8238061060493

{"global_step": 30000, "eval_re": [723.5290639180851, 726.1520590517836, 
612.6499179195424, 699.3819751042284, 698.8518618108708, 653.5728454207983, 
802.2887934017672, 80.92253819571307, 608.3750908523741, 732.51391538533], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 109, 1000, 1000]}

  4%|▍         | 39998/1000000 [32:50<10:37:49, 25.09it/s]global step 40000, trans_decision ep_re 389.35882058833676

{"global_step": 40000, "eval_re": [602.0318186103219, 757.6396615782758, 
794.0023356694984, 230.95060296703127, 18.45408029886741, 739.1144059999347, 
418.15153965683095, 34.50673561026025, 224.2400408094267, 74.49698468291976], 
"eval_len": [617, 1000, 937, 262, 31, 1000, 419, 61, 281, 87]}

  5%|▍         | 49998/1000000 [42:00<10:39:03, 24.78it/s]global step 50000, trans_decision ep_re 220.12928051690787

{"global_step": 50000, "eval_re": [23.77088737317512, 715.6368653390354, 
144.94003430119142, 80.27142611989998, 60.41871186850668, 48.95632774884805, 
740.5054242960607, 97.87605845222203, 114.48812230296588, 174.42894736717363], 
"eval_len": [31, 1000, 132, 85, 63, 60, 1000, 96, 96, 153]}

  6%|▌         | 59997/1000000 [51:00<10:39:42, 24.49it/s]global step 60000, trans_decision ep_re 364.7848301924481

{"global_step": 60000, "eval_re": [178.88210702197304, 333.2695407276324, 
277.4813566199097, 31.429950098938097, 416.688947190843, 265.7990710985078, 
778.6818329222089, 195.90168168410764, 988.0760640005694, 181.63775055979156], 
"eval_len": [196, 414, 313, 38, 417, 250, 1000, 244, 1000, 237]}

  7%|▋         | 69998/1000000 [1:00:10<10:17:06, 25.12it/s]global step 70000, trans_decision ep_re 422.4292918853027

{"global_step": 70000, "eval_re": [350.92259324796714, 695.0401722718253, 
664.0915048406264, 408.0207483715712, 335.9525422182598, 101.5844199078292, 
423.4795021748191, 394.3140204960816, 151.31307140527002, 699.5743439187769], 
"eval_len": [413, 1000, 1000, 403, 385, 111, 436, 530, 193, 731]}

  8%|▊         | 79997/1000000 [1:09:20<10:12:10, 25.05it/s]global step 80000, trans_decision ep_re 253.68385668794

{"global_step": 80000, "eval_re": [236.3628712189724, 113.17128475796915, 
649.3867147154427, 185.02402664664868, 295.58373787583645, 440.1753806624529, 
21.17558492704994, 296.7212258231785, 174.29424934903943, 124.9434909028097], 
"eval_len": [204, 86, 512, 169, 277, 388, 34, 279, 160, 85]}

  9%|▉         | 89997/1000000 [1:18:20<10:15:00, 24.66it/s]global step 90000, trans_decision ep_re 474.4420333945692

{"global_step": 90000, "eval_re": [100.402877364061, 306.15495825468906, 
224.49086893093508, 412.3947481255478, 703.187788136626, 531.8671354014122, 
1080.625617782396, 869.4468830870665, 442.025026638711, 73.82443022424721], 
"eval_len": [104, 240, 176, 359, 1000, 417, 1000, 798, 336, 77]}

 10%|▉         | 99999/1000000 [1:27:40<10:11:20, 24.54it/s]global step 100000, trans_decision ep_re 428.4488767655107

{"global_step": 100000, "eval_re": [120.05078348348754, 430.41349557269075, 
103.6126451561518, 41.348010568695614, 323.3106798472885, 1192.8624035767693, 
410.59438709660725, 1018.4208018521972, 47.98752511433722, 595.888035386882], 
"eval_len": [108, 1000, 80, 50, 275, 1000, 367, 852, 51, 429]}

 11%|█         | 109999/1000000 [1:36:50<9:53:18, 25.00it/s]global step 110000, trans_decision ep_re 237.21587002336446

{"global_step": 110000, "eval_re": [149.59984867786457, 669.198198756099, 
438.95064276897295, 278.9145203478737, 366.4663750409708, 124.02866003597485, 
210.3988333338784, 35.311329144718364, 35.15673512696103, 64.13355700033138], 
"eval_len": [152, 1000, 377, 212, 380, 103, 165, 44, 41, 72]}

 12%|█▏        | 119998/1000000 [1:45:50<9:51:41, 24.79it/s]global step 120000, trans_decision ep_re 364.0489541752658

{"global_step": 120000, "eval_re": [168.85532879653275, 235.1742822541681, 
373.60908304818815, 745.8362649655153, 390.57684790969483, 99.75509181075874, 
483.45256240826234, 258.98093850240457, 207.83616418841905, 676.4129778687144], 
"eval_len": [162, 168, 320, 532, 291, 105, 425, 215, 247, 1000]}

 13%|█▎        | 129998/1000000 [1:55:00<9:37:55, 25.09it/s]global step 130000, trans_decision ep_re 407.78834838071026

{"global_step": 130000, "eval_re": [352.89620203108814, 687.1418506958372, 
43.70093094033665, 150.40091867567904, 527.5570383236274, 150.69535436689162, 
163.80971625907426, 690.7146178897024, 447.7946413093623, 863.1722133155038], 
"eval_len": [255, 1000, 49, 103, 366, 183, 137, 1000, 308, 1000]}

 14%|█▍        | 139998/1000000 [2:04:10<9:37:53, 24.80it/s]global step 140000, trans_decision ep_re 176.58919106257068

{"global_step": 140000, "eval_re": [72.18413409597107, 208.0380113033235, 
159.01691290465965, 35.60228509347035, 112.52678210769973, 387.0929912337844, 
126.14249519421641, 120.23500781975468, 161.3970497563378, 383.65624111648907], 
"eval_len": [74, 199, 236, 43, 111, 296, 132, 98, 116, 339]}

 15%|█▍        | 149997/1000000 [2:13:10<9:37:26, 24.53it/s]global step 150000, trans_decision ep_re 606.4319659655366

{"global_step": 150000, "eval_re": [105.10929911293645, 451.06116391066746, 
1387.385780666777, 761.9235124688252, 189.20030486841344, 998.5549067365478, 
273.79492208226304, 742.6056369124516, 881.4793184895054, 273.20481440697887], 
"eval_len": [96, 308, 1000, 1000, 168, 1000, 207, 1000, 718, 211]}

 16%|█▌        | 159999/1000000 [2:22:30<9:33:02, 24.43it/s]global step 160000, trans_decision ep_re 464.25775109143007

{"global_step": 160000, "eval_re": [220.30832294643895, 546.7936434941832, 
900.9184922008923, 531.9911042636061, 217.47023456199628, 419.30653591641294, 
763.8459482360748, 727.7142687672598, 62.99680772685964, 251.23215280057687], 
"eval_len": [206, 372, 628, 410, 172, 311, 543, 479, 54, 181]}

 17%|█▋        | 169999/1000000 [2:31:40<9:22:19, 24.60it/s]global step 170000, trans_decision ep_re 642.0446932651984

{"global_step": 170000, "eval_re": [56.375859790993154, 998.5427518812998, 
633.1161681172589, 766.58720946023, 1076.846708028041, 1120.997928816711, 
336.02901503314047, 915.6765708496836, 451.2319379645176, 65.0427827101071], 
"eval_len": [51, 1000, 495, 528, 873, 1000, 249, 1000, 320, 70]}

 18%|█▊        | 179998/1000000 [2:40:50<9:11:46, 24.77it/s]global step 180000, trans_decision ep_re 357.1804552763529

{"global_step": 180000, "eval_re": [176.48174131798888, 297.9338254185605, 
937.3397139580094, 351.87451082064666, 348.47685875703394, 81.98692264696065, 
220.76246386049124, 700.6770378832484, 120.37969186188411, 335.89178623870515], 
"eval_len": [149, 207, 661, 269, 294, 73, 179, 1000, 123, 275]}

 19%|█▉        | 189999/1000000 [2:50:00<9:08:58, 24.59it/s]global step 190000, trans_decision ep_re 597.1445516284873

{"global_step": 190000, "eval_re": [353.29406143732615, 802.8186654263557, 
396.8412076770717, 173.60041606434308, 792.3804834251191, 35.356856503550496, 
477.5120400133495, 654.6328758381126, 1522.8169791432335, 762.191930756411], 
"eval_len": [240, 1000, 312, 127, 525, 46, 381, 1000, 1000, 449]}

 20%|█▉        | 199999/1000000 [2:59:10<9:00:28, 24.67it/s]global step 200000, trans_decision ep_re 418.39503836124834

{"global_step": 200000, "eval_re": [209.5639815936952, 927.737034332548, 
273.68259008245377, 376.8952159782521, 87.45298908134605, 153.03382404730903, 
772.9858666846225, 579.1102109742632, 649.86232284487, 153.62634799312352], 
"eval_len": [170, 1000, 162, 301, 61, 91, 451, 347, 403, 147]}

 21%|██        | 209999/1000000 [3:08:10<8:45:56, 25.03it/s]global step 210000, trans_decision ep_re 433.04463310807495

{"global_step": 210000, "eval_re": [170.17453114376318, 311.9313516955001, 
248.48861069200925, 980.8414496646792, 119.4875283581923, 291.625683355066, 
228.24398207249993, 206.19260451126692, 875.4609133113489, 897.9996762764243], 
"eval_len": [102, 174, 191, 1000, 82, 251, 185, 124, 545, 1000]}

 22%|██▏       | 219999/1000000 [3:17:10<8:41:33, 24.93it/s]global step 220000, trans_decision ep_re 499.96233816509994

{"global_step": 220000, "eval_re": [460.5191080588864, 1510.4380200365806, 
320.5491922328031, 274.0920770966153, 320.8230694491403, 358.5824651469174, 
133.13080859854597, 115.05663819660495, 1133.4802188523174, 372.95178398258827],
"eval_len": [313, 1000, 206, 196, 245, 309, 106, 81, 1000, 245]}

 23%|██▎       | 229999/1000000 [3:26:10<8:40:25, 24.66it/s]global step 230000, trans_decision ep_re 211.27753938215764

{"global_step": 230000, "eval_re": [152.4482020694562, 95.4920136250444, 
57.85906264783566, 242.28964986965389, 97.05480299258268, 130.6841226230665, 
760.1360843910832, 339.0748289166879, 156.87883497832604, 80.85779170784004], 
"eval_len": [128, 87, 50, 168, 71, 101, 1000, 255, 90, 58]}

 24%|██▍       | 239998/1000000 [3:35:00<8:17:18, 25.47it/s]global step 240000, trans_decision ep_re 282.9057128659443

{"global_step": 240000, "eval_re": [417.73077226709745, 53.397191897736626, 
721.8390731902396, 288.45926057749153, 71.93248207959655, 105.68279102483102, 
165.57976005719783, 115.02363799035399, 755.2984764581798, 134.11368311671816], 
"eval_len": [351, 59, 535, 213, 49, 94, 135, 78, 483, 105]}

 25%|██▍       | 249998/1000000 [3:43:50<8:08:35, 25.58it/s]global step 250000, trans_decision ep_re 318.3670831777757

{"global_step": 250000, "eval_re": [606.9549580940959, 846.9727445980608, 
56.748228000013206, 177.99102817268587, 410.08875860661533, 603.5569317222765, 
113.24000275313698, 100.58362597176904, 183.40475951262965, 84.12979434647396], 
"eval_len": [376, 1000, 51, 129, 378, 331, 76, 80, 143, 73]}

 26%|██▌       | 259999/1000000 [3:52:40<8:10:56, 25.12it/s]global step 260000, trans_decision ep_re 524.6069086762487

{"global_step": 260000, "eval_re": [293.06507105796396, 832.1788080955847, 
376.4337334214354, 801.5075574666597, 737.8192627500131, 357.7497608251124, 
102.17028511333683, 251.27190881288544, 967.3600623384453, 526.5126368810508], 
"eval_len": [240, 1000, 300, 1000, 428, 229, 88, 176, 1000, 339]}

 27%|██▋       | 269999/1000000 [4:01:50<8:00:56, 25.30it/s]global step 270000, trans_decision ep_re 539.6460590654042

{"global_step": 270000, "eval_re": [284.9017848115159, 1130.0036600482124, 
107.3745995228302, 448.1741746171498, 46.42783858278868, 916.7764290680022, 
208.96090174130993, 1028.632583280055, 132.7087050557413, 1092.4999139264366], 
"eval_len": [221, 652, 125, 340, 49, 575, 166, 1000, 134, 1000]}

 28%|██▊       | 279999/1000000 [4:10:50<7:59:29, 25.03it/s]global step 280000, trans_decision ep_re 272.80845208877867

{"global_step": 280000, "eval_re": [67.18923021969421, 120.33335424884562, 
135.45377303972583, 755.7467543903576, 264.3164053757752, 496.218735165377, 
100.39537189057576, 138.93897613427225, 534.214200661614, 115.27771976154905], 
"eval_len": [58, 79, 99, 1000, 180, 312, 69, 139, 375, 103]}

 29%|██▉       | 289998/1000000 [4:19:40<7:43:55, 25.51it/s]global step 290000, trans_decision ep_re 595.6542676691763

{"global_step": 290000, "eval_re": [1350.462161070226, 147.8153731277754, 
755.8363327184123, 301.0073188767646, 1373.6278628238063, 929.9260696475625, 
329.82015840782515, 282.7570519771287, 221.5923964601433, 263.6979515821184], 
"eval_len": [1000, 138, 1000, 187, 1000, 1000, 258, 169, 142, 179]}

 30%|██▉       | 299997/1000000 [4:28:40<7:42:53, 25.20it/s]global step 300000, trans_decision ep_re 311.42645187641136

{"global_step": 300000, "eval_re": [232.3913933242276, 82.65460817009412, 
82.93029615349019, 399.22876872520374, 571.8111495613579, 117.73619685900043, 
1253.3396861091774, 215.84360338327974, 55.99614896562555, 102.33266751265717], 
"eval_len": [179, 66, 69, 243, 408, 96, 909, 161, 58, 60]}

 31%|███       | 309999/1000000 [4:37:30<7:35:16, 25.26it/s]global step 310000, trans_decision ep_re 314.6017634002932

{"global_step": 310000, "eval_re": [210.38906203761385, 355.67483549343905, 
390.2192865377746, 145.1033007662203, 162.48869169393845, 120.12390094982898, 
1161.7308969551464, 382.64540713807935, 132.24257737317916, 85.39967505771143], 
"eval_len": [130, 264, 264, 127, 95, 96, 601, 183, 126, 69]}

 32%|███▏      | 319999/1000000 [4:46:30<7:31:50, 25.08it/s]global step 320000, trans_decision ep_re 399.0691530641676

{"global_step": 320000, "eval_re": [303.7021416191004, 395.64158786827477, 
118.08804098753347, 812.3052377569664, 222.6086872518549, 1289.2695664683065, 
32.954924824552535, 180.3317431414964, 498.594437641302, 137.19516308228876], 
"eval_len": [192, 285, 83, 592, 146, 901, 40, 128, 393, 85]}

 33%|███▎      | 329997/1000000 [4:55:20<7:27:16, 24.97it/s]global step 330000, trans_decision ep_re 1028.7571460748736

{"global_step": 330000, "eval_re": [823.4260048962325, 1103.2925479683838, 
1529.0793406863475, 511.52504810990285, 1270.1762213671518, 1519.949842715576, 
1570.6210351068758, 1383.941061139454, 338.38878081236544, 237.1715779464459], 
"eval_len": [515, 775, 1000, 337, 1000, 1000, 1000, 1000, 195, 158]}

 34%|███▍      | 339999/1000000 [5:04:40<7:17:27, 25.14it/s]global step 340000, trans_decision ep_re 495.7582871225933

{"global_step": 340000, "eval_re": [628.0589467418607, 735.4739917054583, 
722.4674956503736, 410.8018404629623, 124.78134289344817, 885.9389893904103, 
718.8481875116552, 142.68172967304162, 523.2629887998575, 65.26735839686567], 
"eval_len": [403, 413, 418, 225, 73, 1000, 396, 88, 383, 73]}

 35%|███▍      | 349999/1000000 [5:13:40<7:19:15, 24.66it/s]global step 350000, trans_decision ep_re 619.6238213144652

{"global_step": 350000, "eval_re": [1378.3867312878688, 472.65152863411674, 
604.6152903687489, 241.62255586274904, 288.5028923158393, 948.1172869607719, 
324.62359419037034, 691.8356342325112, 130.16676206675658, 1115.7159372249191], 
"eval_len": [1000, 301, 480, 153, 217, 691, 231, 500, 88, 740]}

 36%|███▌      | 359998/1000000 [5:22:40<7:00:22, 25.37it/s]global step 360000, trans_decision ep_re 847.967443068292

{"global_step": 360000, "eval_re": [439.6924177697506, 1419.3202551563102, 
282.33134966627597, 1333.2123209424415, 1453.0375000331608, 859.9874398454846, 
383.91996206689964, 1244.0518807577348, 499.7071751598326, 564.4141292850302], 
"eval_len": [295, 1000, 226, 786, 1000, 530, 272, 887, 342, 522]}

 37%|███▋      | 369999/1000000 [5:31:40<6:53:43, 25.38it/s]global step 370000, trans_decision ep_re 828.778220864125

{"global_step": 370000, "eval_re": [546.6424796841108, 1867.1144941809173, 
280.55949766617766, 695.7138526404797, 37.3887191056462, 1019.0632316069056, 
823.5095901619571, 1675.160315822807, 864.1644102986102, 478.4656174736393], 
"eval_len": [357, 1000, 211, 394, 46, 588, 438, 1000, 580, 282]}

 38%|███▊      | 379998/1000000 [5:40:50<6:49:58, 25.20it/s]global step 380000, trans_decision ep_re 945.7512390422968

{"global_step": 380000, "eval_re": [867.745390507625, 813.3869972762454, 
504.5163182768371, 55.80478966581777, 675.0726685125927, 1659.847831457902, 
789.43312551284, 1741.1039705464607, 1303.1719819568386, 1047.42931670981], 
"eval_len": [1000, 1000, 359, 63, 482, 1000, 472, 1000, 726, 605]}

 39%|███▉      | 389997/1000000 [5:50:00<6:45:51, 25.05it/s]global step 390000, trans_decision ep_re 453.27296919385026

{"global_step": 390000, "eval_re": [163.71893317815153, 354.61942974631546, 
825.6438784327798, 108.27183518077749, 367.08333040521757, 1074.6553636560936, 
264.9137354885373, 860.1358188883918, 164.7384382153753, 348.94892874686315], 
"eval_len": [158, 281, 581, 92, 231, 733, 178, 515, 113, 289]}

 40%|███▉      | 399999/1000000 [5:59:00<6:34:31, 25.35it/s]global step 400000, trans_decision ep_re 416.3517321698009

{"global_step": 400000, "eval_re": [395.1801764198472, 257.69752570082034, 
277.2489039587979, 135.3692714515519, 472.601677333287, 961.9186115637142, 
73.25568471395808, 195.44059636871654, 349.64686750037436, 1045.1580066869421], 
"eval_len": [337, 127, 168, 165, 274, 1000, 72, 111, 232, 550]}

 41%|████      | 409997/1000000 [6:07:50<6:27:54, 25.35it/s]global step 410000, trans_decision ep_re 389.72519562561774

{"global_step": 410000, "eval_re": [685.0201938914021, 227.50619640271643, 
140.80055432089074, 189.8712117708925, 67.55082802216383, 661.9579071577242, 
87.46163035501623, 506.26268503623197, 1082.4650913455025, 248.35565795363692], 
"eval_len": [340, 134, 86, 130, 59, 438, 69, 309, 1000, 168]}

 42%|████▏     | 419999/1000000 [6:16:50<6:24:32, 25.14it/s]global step 420000, trans_decision ep_re 560.0214579676269

{"global_step": 420000, "eval_re": [498.78368953062994, 855.5968665018163, 
146.59754207357037, 686.5929631443173, 574.6453958180433, 145.11476004846295, 
45.70327594396925, 1833.1251264965904, 789.8874103281199, 24.167549790750286], 
"eval_len": [289, 1000, 106, 470, 395, 109, 53, 1000, 410, 27]}

 43%|████▎     | 429999/1000000 [6:25:40<6:17:28, 25.17it/s]global step 430000, trans_decision ep_re 877.8056557522193

{"global_step": 430000, "eval_re": [1038.5387212715334, 958.694055198613, 
521.3934904087079, 549.8806666521283, 1318.5368074636167, 1127.1930913405306, 
998.3558783348591, 391.31749625331577, 1049.9327953207767, 824.2135552781123], 
"eval_len": [1000, 1000, 403, 1000, 794, 791, 625, 235, 672, 536]}

 44%|████▍     | 439999/1000000 [6:34:50<6:16:29, 24.79it/s]global step 440000, trans_decision ep_re 330.1224125761501

{"global_step": 440000, "eval_re": [144.55139134963713, 194.31677016410976, 
180.47142207295119, 155.66729464939837, 308.3787516572326, 611.0695206561721, 
666.8560851523298, 603.4937138157684, 335.00034258981395, 101.41883365408775], 
"eval_len": [99, 137, 108, 106, 187, 366, 335, 324, 253, 97]}

 45%|████▍     | 449999/1000000 [6:43:50<6:04:06, 25.18it/s]global step 450000, trans_decision ep_re 1047.3358106531682

{"global_step": 450000, "eval_re": [1658.7317786567346, 1114.0308255762543, 
1727.262503893442, 1560.5813674755225, 1203.992251375018, 377.99030768762736, 
148.86092856416155, 1756.7272763173867, 77.31276809187888, 847.8680988936566], 
"eval_len": [861, 694, 1000, 1000, 1000, 240, 93, 1000, 56, 594]}

 46%|████▌     | 459999/1000000 [6:52:50<5:57:18, 25.19it/s]global step 460000, trans_decision ep_re 1162.879067985565

{"global_step": 460000, "eval_re": [1635.0473489652995, 1717.4573878439426, 
800.1455212404026, 1809.6595269352451, 1056.0204436534057, 738.1737107302006, 
1838.447174833148, 1554.9572043758624, 67.121407984783, 411.76095329336033], 
"eval_len": [1000, 1000, 475, 1000, 660, 1000, 1000, 1000, 59, 259]}

 47%|████▋     | 469999/1000000 [7:02:10<5:52:57, 25.03it/s]global step 470000, trans_decision ep_re 440.4396702117795

{"global_step": 470000, "eval_re": [491.4723376112263, 385.7730564561776, 
401.0537142497239, 936.596877934337, 235.92481225525526, 326.73164019641047, 
148.83431169664473, 228.31071188758565, 722.9343395080869, 526.7649003223473], 
"eval_len": [342, 228, 1000, 1000, 174, 290, 103, 147, 1000, 1000]}

 48%|████▊     | 479998/1000000 [7:11:10<5:37:48, 25.66it/s]global step 480000, trans_decision ep_re 495.19512655694314

{"global_step": 480000, "eval_re": [177.63129079231265, 991.0986443884843, 
165.19391548323097, 89.90440882066328, 640.227641724516, 677.1417641455553, 
1841.7293087562061, 87.46271316553324, 196.45099126969782, 85.11058702323233], 
"eval_len": [86, 606, 130, 59, 319, 418, 1000, 70, 141, 59]}

 49%|████▉     | 489999/1000000 [7:20:00<5:37:33, 25.18it/s]global step 490000, trans_decision ep_re 1039.8841883585017

{"global_step": 490000, "eval_re": [1022.3412498101026, 1661.840325623947, 
736.4823399931096, 764.0524329814696, 238.5067929447028, 577.7515148650101, 
1726.1490226285332, 1643.830639303976, 312.97722402351303, 1714.9103414106548], 
"eval_len": [616, 1000, 411, 509, 203, 321, 1000, 1000, 189, 1000]}

 50%|████▉     | 499997/1000000 [7:29:10<5:31:38, 25.13it/s]global step 500000, trans_decision ep_re 401.74881708481905

{"global_step": 500000, "eval_re": [1106.8208033646192, 101.7128696018115, 
785.5641934697179, 205.38146509523932, 113.75788399185716, 21.279003022690958, 
435.0720761396816, 291.90421399968346, 141.87663723641813, 814.119024926471], 
"eval_len": [654, 62, 505, 127, 90, 37, 272, 148, 104, 1000]}

 51%|█████     | 509999/1000000 [7:38:10<5:22:33, 25.32it/s]global step 510000, trans_decision ep_re 587.2896102758618

{"global_step": 510000, "eval_re": [471.56027366864356, 1481.1613935919156, 
1007.1401122538704, 292.54920603538466, 288.7265702889914, 1031.4369226098258, 
150.37205018159833, 128.9562847917861, 390.55080486543017, 630.4424844711723], 
"eval_len": [294, 783, 598, 181, 228, 1000, 88, 91, 322, 1000]}

 52%|█████▏    | 519998/1000000 [7:47:10<5:19:27, 25.04it/s]global step 520000, trans_decision ep_re 520.1073838139333

{"global_step": 520000, "eval_re": [162.2124560681733, 827.1153737407793, 
214.4179537927779, 1130.9989005194595, 1233.2781502111477, 215.59712394145023, 
295.8815103347297, 347.2176973105141, 357.99831728251394, 416.35635493778705], 
"eval_len": [144, 1000, 173, 812, 1000, 143, 185, 207, 211, 266]}

 53%|█████▎    | 529998/1000000 [7:56:10<5:07:57, 25.44it/s]global step 530000, trans_decision ep_re 776.6733198501672

{"global_step": 530000, "eval_re": [858.7820740369772, 931.5575479491862, 
337.8581629659245, 1619.9696388289115, 209.50217734198972, 99.39436122133982, 
615.7075181395555, 1018.0853444391274, 416.45595985735963, 1659.4204137213003], 
"eval_len": [484, 572, 204, 1000, 140, 113, 328, 614, 223, 1000]}

 54%|█████▍    | 539997/1000000 [8:05:10<5:04:08, 25.21it/s]global step 540000, trans_decision ep_re 422.32510290026374

{"global_step": 540000, "eval_re": [424.9177700121268, 755.901190559098, 
354.8859003928894, 145.63228133854105, 149.66459831823752, 309.9661286413647, 
500.23928617329807, 387.338580233348, 224.95113105583707, 969.7541622778972], 
"eval_len": [259, 461, 1000, 111, 81, 199, 326, 205, 1000, 523]}

 55%|█████▍    | 549999/1000000 [8:14:10<4:57:30, 25.21it/s]global step 550000, trans_decision ep_re 515.4008659167737

{"global_step": 550000, "eval_re": [178.99472203913314, 1644.2926173970852, 
66.77141100834496, 448.287384906553, 426.55662548165196, 236.33583667736505, 
833.1154173272282, 92.8363714416942, 1063.1719724921995, 163.6463003964809], 
"eval_len": [140, 1000, 83, 260, 284, 179, 1000, 122, 1000, 123]}

 56%|█████▌    | 559999/1000000 [8:23:20<4:53:18, 25.00it/s]global step 560000, trans_decision ep_re 483.6968616452805

{"global_step": 560000, "eval_re": [862.1725882446814, 516.9045204344259, 
581.1406701433922, 203.93581739601314, 241.27091291945052, 874.1619531459924, 
337.9330154161333, 237.3103496047519, 381.86830286638747, 600.2704862815771], 
"eval_len": [546, 1000, 356, 143, 154, 562, 233, 163, 226, 367]}

 57%|█████▋    | 569999/1000000 [8:32:20<4:45:23, 25.11it/s]global step 570000, trans_decision ep_re 665.8571510109132

{"global_step": 570000, "eval_re": [86.72524576167397, 655.4182901669553, 
958.2750650197889, 253.9720480383731, 1005.3346824754868, 608.1212200255192, 
952.3829531907431, 1563.9232557103094, 499.8032959495006, 74.6154537707816], 
"eval_len": [58, 1000, 639, 188, 679, 406, 610, 1000, 303, 75]}

 58%|█████▊    | 579997/1000000 [8:41:20<4:36:54, 25.28it/s]global step 580000, trans_decision ep_re 553.2823461859564

{"global_step": 580000, "eval_re": [596.9894923513738, 316.40524003726216, 
314.94502870906604, 1411.7932181522951, 128.56382532326865, 1359.8993318020266, 
572.8361578871899, 85.85874507269708, 714.6501196123787, 30.882302912005237], 
"eval_len": [311, 183, 251, 1000, 78, 1000, 298, 61, 422, 42]}

 59%|█████▉    | 589997/1000000 [8:50:20<4:29:18, 25.37it/s]global step 590000, trans_decision ep_re 514.4844053954182

{"global_step": 590000, "eval_re": [995.1613119904581, 174.60143091934287, 
642.4922518037843, 218.34259012178117, 487.63509674232614, 797.9478890123781, 
226.84039898711032, 112.2034358553895, 356.20000052540263, 1133.41964799621], 
"eval_len": [522, 122, 1000, 132, 324, 1000, 146, 64, 229, 710]}

 60%|█████▉    | 599997/1000000 [8:59:20<4:29:12, 24.76it/s]global step 600000, trans_decision ep_re 196.72918156168

{"global_step": 600000, "eval_re": [228.88707739121375, 120.96071920547638, 
101.9313110337159, 213.0656233101948, 109.13215904787008, 322.0233549393033, 
65.28744124627124, 368.8132216468282, 95.95377277068515, 341.23713502524134], 
"eval_len": [183, 87, 153, 152, 87, 1000, 51, 328, 68, 1000]}

 61%|██████    | 609997/1000000 [9:08:20<4:19:36, 25.04it/s]global step 610000, trans_decision ep_re 153.59809737864623

{"global_step": 610000, "eval_re": [105.47331941212364, 56.68980521181907, 
249.0317527230727, 207.38810452810264, 169.15895438004839, 76.81509398908206, 
295.0847649383784, 216.07444102717025, 71.64418721574263, 88.6205503609224], 
"eval_len": [58, 98, 155, 1000, 147, 81, 199, 191, 121, 64]}

 62%|██████▏   | 619999/1000000 [9:17:10<4:11:34, 25.18it/s]global step 620000, trans_decision ep_re 98.01608678776873

{"global_step": 620000, "eval_re": [128.1209953065458, 312.48616409011106, 
137.97440660103027, 9.294493740433161, 40.42593586784574, 164.66247090687108, 
161.87482215464155, -280.18841864924144, 192.122847692359, 113.38715016709114], 
"eval_len": [131, 1000, 88, 107, 52, 115, 133, 1000, 94, 117]}

 63%|██████▎   | 629998/1000000 [9:26:10<4:03:41, 25.31it/s]global step 630000, trans_decision ep_re 114.59550261104243

{"global_step": 630000, "eval_re": [89.27139994527542, 111.38314669416208, 
37.81182045135921, 127.81017756028416, 188.75727654562218, 123.81251386800501, 
80.0930629614303, 75.58632003450928, 232.12823778542014, 79.30107026435648], 
"eval_len": [85, 248, 50, 957, 1000, 97, 107, 58, 164, 90]}

 64%|██████▍   | 639999/1000000 [9:35:10<3:58:40, 25.14it/s]global step 640000, trans_decision ep_re 32.57254442235434

{"global_step": 640000, "eval_re": [25.719120916033656, 34.95603591334159, 
-146.114771802378, -14.413896806079773, 41.00825440829121, 185.1872847723318, 
70.03361251873784, 80.86487130738497, 90.81900595385093, -42.334072957970896], 
"eval_len": [81, 227, 1000, 89, 138, 1000, 125, 173, 199, 316]}

 65%|██████▍   | 649997/1000000 [9:44:00<3:49:53, 25.37it/s]global step 650000, trans_decision ep_re 118.97070523551686

{"global_step": 650000, "eval_re": [67.71651166111434, 57.898678076533265, 
158.660884178046, 126.09226180504147, 50.487802121406304, 180.67130542579275, 
276.37337866831086, 114.1408512970705, 65.09267535141281, 92.57270377044043], 
"eval_len": [64, 60, 1000, 143, 50, 1000, 251, 73, 61, 102]}

 66%|██████▌   | 659998/1000000 [9:53:00<3:47:29, 24.91it/s]global step 660000, trans_decision ep_re -25.453464467636902

{"global_step": 660000, "eval_re": [118.64292589340435, 97.83133059643255, 
28.645825383167026, -779.728781868117, 51.77087729780525, 38.199130617301016, 
55.3385944794316, 68.2604629297975, 41.974376878300184, 24.53061311610859], 
"eval_len": [409, 109, 50, 1000, 47, 48, 75, 85, 53, 37]}

 67%|██████▋   | 669998/1000000 [10:01:50<3:36:45, 25.37it/s]global step 670000, trans_decision ep_re 73.50977165276409

{"global_step": 670000, "eval_re": [332.54261607385865, 51.97925778225459, 
18.02346500170733, 20.482762797569283, 81.83990560729501, 66.2446480975868, 
17.98350634946182, 28.198911744913804, 93.14024041189369, 24.662402661099847], 
"eval_len": [1000, 47, 57, 33, 121, 73, 39, 36, 116, 90]}

 68%|██████▊   | 679997/1000000 [10:10:40<3:32:26, 25.11it/s]global step 680000, trans_decision ep_re 72.18025697768807

{"global_step": 680000, "eval_re": [27.801877881822097, 59.44332479357002, 
43.94398730560632, 32.1334719118326, 30.185246357923095, 41.49613980850711, 
28.926093091259432, 203.35274113854732, 18.256659800815243, 236.2630276869975], 
"eval_len": [36, 52, 93, 133, 129, 49, 53, 1000, 34, 477]}

 69%|██████▉   | 689999/1000000 [10:19:30<3:26:25, 25.03it/s]global step 690000, trans_decision ep_re 241.2054801068652

{"global_step": 690000, "eval_re": [470.9945236655393, -3.6799705831287572, 
333.6340724545258, 195.49343918826372, 246.56521438810069, 330.52528957403877, 
170.21803039714922, 168.74871732767863, 456.7438634726265, 42.811621183858264], 
"eval_len": [1000, 1000, 1000, 314, 1000, 397, 367, 1000, 1000, 72]}

 70%|██████▉   | 699999/1000000 [10:28:40<3:17:41, 25.29it/s]global step 700000, trans_decision ep_re 164.578140564497

{"global_step": 700000, "eval_re": [90.14993089617619, -144.23940099949291, 
144.58626638293575, 580.14732849988, -317.7185476227883, 427.7508220960932, 
90.6639466530127, -86.53925330000168, 420.85530000392225, 440.12501303523334], 
"eval_len": [114, 1000, 139, 1000, 1000, 1000, 72, 1000, 1000, 433]}

 71%|███████   | 709999/1000000 [10:37:50<3:12:20, 25.13it/s]global step 710000, trans_decision ep_re 167.67002386571133

{"global_step": 710000, "eval_re": [110.9292058177054, 124.93840442998594, 
21.78212080797698, 286.2969163901976, 190.7167783469254, -316.0893913676474, 
255.03069492118198, 187.8777765821713, 425.1679358362948, 390.04979689232124], 
"eval_len": [159, 162, 50, 272, 174, 1000, 347, 229, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [10:46:50<3:05:43, 25.13it/s]global step 720000, trans_decision ep_re 196.74589625960644

{"global_step": 720000, "eval_re": [64.86135900050834, 362.5640415749744, 
166.80145342017065, -45.330197523409524, 404.08321936834267, 65.4625617821178, 
648.5762210787944, 211.20216325565573, -129.0079220253932, 218.24606266430277], 
"eval_len": [57, 265, 144, 1000, 441, 70, 1000, 219, 1000, 166]}

 73%|███████▎  | 729999/1000000 [10:55:50<2:57:04, 25.41it/s]global step 730000, trans_decision ep_re 316.236259534602

{"global_step": 730000, "eval_re": [92.80349501147593, 117.59756868596823, 
519.8116621391929, 143.0857902438062, 349.0658796250763, 438.8622933478416, 
106.88890430025448, 561.8315390297622, 638.1654388718806, 194.25002409076154], 
"eval_len": [93, 145, 1000, 108, 1000, 1000, 126, 1000, 1000, 199]}

 74%|███████▍  | 739998/1000000 [11:05:00<2:53:19, 25.00it/s]global step 740000, trans_decision ep_re 434.9058453257445

{"global_step": 740000, "eval_re": [162.68003960619902, 405.6040581513277, 
651.7271959792334, 222.12347283007372, 349.98998247989215, 886.123317733163, 
693.795798055508, 317.59522885609914, 479.46244496619306, 179.95691459975546], 
"eval_len": [139, 407, 499, 155, 244, 1000, 686, 220, 331, 117]}

 75%|███████▍  | 749998/1000000 [11:14:00<2:46:28, 25.03it/s]global step 750000, trans_decision ep_re 176.24647494138944

{"global_step": 750000, "eval_re": [39.92660091614222, 68.66825646391287, 
81.62061599329719, 40.270922803897946, 459.8539517172248, 48.40942533528749, 
123.82721737292233, 87.20211490208287, 574.3139124714404, 238.3717314376862], 
"eval_len": [50, 49, 123, 51, 1000, 52, 100, 69, 1000, 220]}

 76%|███████▌  | 759999/1000000 [11:23:00<2:38:14, 25.28it/s]global step 760000, trans_decision ep_re 243.0281278821486

{"global_step": 760000, "eval_re": [120.39527999955901, 91.9834363858563, 
221.65402975287748, 54.71631177877486, 149.49718130902764, 494.44362045645937, 
136.16434082088023, 457.2288550392249, 233.61259862157385, 470.5856246572527], 
"eval_len": [73, 81, 168, 48, 128, 1000, 99, 1000, 261, 1000]}

 77%|███████▋  | 769999/1000000 [11:31:50<2:32:22, 25.16it/s]global step 770000, trans_decision ep_re 231.04201936545255

{"global_step": 770000, "eval_re": [199.12902488802462, 91.10510074652835, 
110.57698775264913, 158.95140659378757, 739.3158967922475, 69.47776369043835, 
367.7379912316378, 372.1988708279303, 98.26629610745094, 103.66085502383122], 
"eval_len": [292, 108, 134, 179, 1000, 66, 1000, 365, 103, 105]}

 78%|███████▊  | 779999/1000000 [11:40:50<2:26:32, 25.02it/s]global step 780000, trans_decision ep_re 247.86220606893298

{"global_step": 780000, "eval_re": [140.6459239821071, 206.5485873145115, 
443.76958321011955, 183.41771439990924, 394.99309429966536, 62.31708472396483, 
465.88119204717503, 115.85941052138762, 389.1436807032733, 76.0457894872164], 
"eval_len": [81, 183, 1000, 170, 1000, 64, 1000, 100, 1000, 74]}

 79%|███████▉  | 789999/1000000 [11:50:00<2:20:33, 24.90it/s]global step 790000, trans_decision ep_re 122.99171706546556

{"global_step": 790000, "eval_re": [106.65969383262569, 94.11185440436896, 
82.9310777277838, 119.90666569504583, 88.73719230350036, 105.91743353875665, 
70.42205297195163, 480.47485669675314, 56.549320546268376, 24.207022937601412], 
"eval_len": [69, 90, 107, 78, 71, 140, 63, 1000, 47, 47]}

 80%|███████▉  | 799997/1000000 [11:58:50<2:12:15, 25.20it/s]global step 800000, trans_decision ep_re 134.29374934811733

{"global_step": 800000, "eval_re": [116.33269246402806, 105.72861370657, 
61.57750269193208, 103.97923760377977, 119.11619479660062, 43.083023419782954, 
75.50646902671232, 133.18904316860883, 502.442722204541, 81.9819943986176], 
"eval_len": [118, 88, 78, 136, 96, 58, 80, 118, 1000, 60]}

 81%|████████  | 809999/1000000 [12:07:50<2:04:58, 25.34it/s]global step 810000, trans_decision ep_re 259.9649028458598

{"global_step": 810000, "eval_re": [77.49873985702818, 44.68422045931031, 
153.94918045700715, 92.51257415579691, 126.31060109016262, 612.7845981276583, 
194.55245953432387, 657.9657733235041, 52.2720156119124, 587.1188658418941], 
"eval_len": [68, 44, 130, 83, 132, 1000, 219, 1000, 46, 1000]}

 82%|████████▏ | 819998/1000000 [12:16:50<1:57:47, 25.47it/s]global step 820000, trans_decision ep_re 194.82336003384776

{"global_step": 820000, "eval_re": [95.39204527038817, 47.695080479057644, 
354.62975111263705, 122.83591216206987, 54.09450780089011, 410.3118353845469, 
34.59109946495741, 35.654541157076174, 177.70578894136315, 615.3230385654911], 
"eval_len": [83, 48, 1000, 69, 72, 375, 39, 50, 141, 1000]}

 83%|████████▎ | 829999/1000000 [12:25:50<1:51:24, 25.43it/s]global step 830000, trans_decision ep_re 435.8510733356413

{"global_step": 830000, "eval_re": [687.8614524472027, 269.2455117983618, 
327.6218027847789, 129.83588352515196, 262.2279997200546, 188.49851003190545, 
427.02897094335725, 716.2501891391715, 1189.8999073664204, 160.04050560000795], 
"eval_len": [414, 196, 251, 105, 1000, 117, 284, 1000, 732, 135]}

 84%|████████▍ | 839998/1000000 [12:34:50<1:45:40, 25.24it/s]global step 840000, trans_decision ep_re 233.40710515208352

{"global_step": 840000, "eval_re": [213.35112505272684, 245.8738295563209, 
74.20309590391427, 123.29368329163144, 235.69248334881914, 230.87283615891832, 
139.516678307757, 223.64003369680535, 133.3576576045973, 714.2696285993449], 
"eval_len": [187, 171, 62, 107, 199, 164, 82, 131, 128, 419]}

 85%|████████▍ | 849997/1000000 [12:43:40<1:38:49, 25.30it/s]global step 850000, trans_decision ep_re 516.9170914380733

{"global_step": 850000, "eval_re": [220.89208209468663, 532.7339893603091, 
956.692607060801, 383.0424070714908, 752.5660955832527, 50.299131514248685, 
550.4968186319453, 556.8011122709447, 589.7655076082563, 575.8811631847975], 
"eval_len": [256, 1000, 1000, 844, 1000, 89, 1000, 1000, 1000, 676]}

 86%|████████▌ | 859999/1000000 [12:53:00<1:32:36, 25.19it/s]global step 860000, trans_decision ep_re 499.9225945881509

{"global_step": 860000, "eval_re": [297.9514252997075, 415.8655388367029, 
352.38033316883826, 258.2149448153616, 832.4429171223936, 160.39514869333615, 
255.2444617726946, 524.1772211264563, 439.7680180838458, 1462.7859369621724], 
"eval_len": [236, 251, 234, 172, 1000, 98, 235, 342, 367, 851]}

 87%|████████▋ | 869999/1000000 [13:02:00<1:25:07, 25.45it/s]global step 870000, trans_decision ep_re 463.1702681538407

{"global_step": 870000, "eval_re": [193.94711928923527, 196.29665415747587, 
310.6292296753953, 364.1337966625291, 1260.2220950636136, 205.2550052727781, 
202.83467685340943, 62.06977604449776, 641.7083112727182, 1194.6060172467542], 
"eval_len": [116, 109, 300, 288, 837, 127, 135, 47, 444, 757]}

 88%|████████▊ | 879999/1000000 [13:10:50<1:21:13, 24.62it/s]global step 880000, trans_decision ep_re 615.4121643385502

{"global_step": 880000, "eval_re": [241.71774945202876, 293.66005331429545, 
1332.6128217651312, 555.6429574226005, 1486.004388865384, 104.15567706707071, 
1142.0115542954097, 578.444266657102, 289.4894338956022, 130.38274065087785], 
"eval_len": [203, 177, 777, 338, 1000, 76, 672, 360, 166, 107]}

 89%|████████▉ | 889999/1000000 [13:19:50<1:12:31, 25.28it/s]global step 890000, trans_decision ep_re 762.9292664294242

{"global_step": 890000, "eval_re": [593.7525513103739, 1522.5068618568314, 
547.9333261950866, 770.1804244816183, 799.6737764583393, 622.704466228051, 
1530.8054885373003, 485.80990815706934, 561.6596529301856, 194.26620813938518], 
"eval_len": [459, 1000, 433, 1000, 505, 424, 1000, 1000, 1000, 176]}

 90%|████████▉ | 899997/1000000 [13:29:00<1:05:42, 25.37it/s]global step 900000, trans_decision ep_re 508.80973022895876

{"global_step": 900000, "eval_re": [457.16926816734207, 338.92585103240845, 
122.34946818808464, 177.53871054207272, 1547.620096431962, 50.10411460713116, 
196.93566630013936, 1504.743445549731, 313.01939028126725, 379.6912911894485], 
"eval_len": [1000, 218, 74, 142, 1000, 48, 136, 996, 222, 290]}

 91%|█████████ | 909997/1000000 [13:38:00<59:51, 25.06it/s]global step 910000, trans_decision ep_re 330.02338801054316

{"global_step": 910000, "eval_re": [325.7966423650294, 556.7199556047394, 
496.892114460658, 631.4873828508726, 158.786561985026, 281.5690073351234, 
335.9956378238571, 103.20450318053918, 195.91969880890665, 213.86237569067987], 
"eval_len": [287, 1000, 1000, 1000, 142, 277, 269, 101, 171, 218]}

 92%|█████████▏| 919998/1000000 [13:47:00<51:49, 25.73it/s]global step 920000, trans_decision ep_re 513.265273334079

{"global_step": 920000, "eval_re": [126.94055816194656, 784.8173354979115, 
762.2178900526542, 151.52778951048774, 1073.8468051244288, 177.28449118838137, 
745.031807799311, 73.30430603018183, 425.38331879845646, 812.2984311770307], 
"eval_len": [98, 631, 1000, 132, 745, 135, 1000, 69, 316, 1000]}

 93%|█████████▎| 929998/1000000 [13:56:00<45:44, 25.50it/s]global step 930000, trans_decision ep_re 526.2528835728336

{"global_step": 930000, "eval_re": [439.4038132078901, 77.12355220093501, 
779.2715439934236, 545.0267052406977, 884.4846730595693, 364.1597164753464, 
883.180645517352, 230.0939424790934, 677.4737622219794, 382.31048133205], 
"eval_len": [280, 122, 1000, 382, 1000, 296, 1000, 137, 1000, 317]}

 94%|█████████▍| 939998/1000000 [14:05:00<38:45, 25.80it/s]global step 940000, trans_decision ep_re 814.9440917239071

{"global_step": 940000, "eval_re": [1485.487082332591, 449.5385720230141, 
639.2587237007322, 425.5962189299346, 366.6890742163115, 383.39040850550055, 
1501.3243128051604, 1354.309573720743, 366.42637083364565, 1177.4205801714388], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [14:14:20<33:06, 25.17it/s]global step 950000, trans_decision ep_re 567.3652170111397

{"global_step": 950000, "eval_re": [330.42837247622197, 735.3865159147142, 
661.8797080165123, 769.7654982699474, 528.2853039345531, 271.01083763688086, 
53.26641295236125, 766.9074229916852, 277.4371132098611, 1279.2849847086602], 
"eval_len": [309, 526, 1000, 643, 1000, 171, 72, 579, 223, 1000]}

 96%|█████████▌| 959998/1000000 [14:23:20<25:56, 25.70it/s]global step 960000, trans_decision ep_re 997.097624895306

{"global_step": 960000, "eval_re": [1475.0661015740056, 951.8982579013913, 
461.3012563978185, 1540.1931304262248, 1146.171877272004, 57.85344819732121, 
1584.6970348696889, 600.8399508065075, 544.7546571788824, 1608.2005343292174], 
"eval_len": [1000, 1000, 311, 1000, 1000, 66, 1000, 414, 411, 1000]}

 97%|█████████▋| 969998/1000000 [14:32:30<19:20, 25.85it/s]global step 970000, trans_decision ep_re 417.04334601231005

{"global_step": 970000, "eval_re": [1574.161153543858, 142.7996058968639, 
595.7776527812928, 141.4820840085551, 363.08945902160633, 305.44184813520883, 
572.6239738410627, 151.71028332265556, 149.52482113201842, 173.82257843997877], 
"eval_len": [1000, 79, 403, 80, 206, 180, 1000, 120, 108, 150]}

 98%|█████████▊| 979999/1000000 [14:41:20<13:13, 25.22it/s]global step 980000, trans_decision ep_re 476.9416629719759

{"global_step": 980000, "eval_re": [273.315665274127, 297.78215514286717, 
290.7440317594562, 43.290272590111314, 676.7072682605935, 162.7509612439666, 
655.8373436296671, 209.29857466423394, 1039.8779448869727, 1119.8124122677639], 
"eval_len": [189, 1000, 208, 42, 472, 123, 377, 111, 726, 676]}

 99%|█████████▉| 989997/1000000 [14:50:20<06:38, 25.12it/s]global step 990000, trans_decision ep_re 422.5390885124587

{"global_step": 990000, "eval_re": [784.9694295888223, 638.9044229929644, 
639.7131143782507, 358.9778882712306, 562.5674700614678, 147.4390939027634, 
182.0418677431303, 253.79046508799627, 265.4170168591039, 391.5701162388568], 
"eval_len": [480, 1000, 409, 233, 371, 130, 124, 176, 197, 228]}

100%|█████████▉| 999998/1000000 [14:59:20<00:00, 25.93it/s]global step 1000000, trans_decision ep_re 716.153159568073

{"global_step": 1000000, "eval_re": [103.8735573485096, 1291.921247794934, 
539.4549652549624, 1143.870039562378, 1530.116028088378, 161.14213828355693, 
43.1146253133645, 1696.2155904523827, 193.5400564698789, 458.28334711238466], 
"eval_len": [88, 949, 329, 1000, 1000, 98, 43, 1000, 133, 324]}

100%|██████████| 1000000/1000000 [14:59:33<00:00, 18.53it/s]
