
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [03:23<8:03:10, 34.15it/s]global step 10000, trans_decision ep_re 284.20779224708303

{"global_step": 10000, "eval_re": [190.88277168191377, 173.93044049672207, 
335.45119819387975, 401.8656367041278, 343.5589175887384, 194.26284064014843, 
337.18676782558333, 251.62765820818376, 156.9420938358094, 456.3695972957237], 
"eval_len": [36, 33, 63, 75, 65, 37, 63, 51, 30, 89]}

  2%|▏         | 19997/1000000 [10:04<8:03:06, 33.81it/s]global step 20000, trans_decision ep_re 235.24836876844992

{"global_step": 20000, "eval_re": [167.7683900805717, 161.88065903027456, 
377.5883686781276, 199.95726918538654, 151.26730154700834, 156.67565919896197, 
327.9175079881976, 301.372874398946, 151.24305766699956, 356.81259991002514], 
"eval_len": [32, 31, 71, 38, 29, 30, 62, 60, 29, 66]}

  3%|▎         | 29997/1000000 [16:41<7:48:15, 34.53it/s]global step 30000, trans_decision ep_re 278.3918281579803

{"global_step": 30000, "eval_re": [414.0559329812072, 146.427599081476, 
172.58738024238954, 236.99181891257925, 369.8810802134342, 220.39506840898082, 
293.6196916613, 193.89874367889598, 232.21178722517234, 503.84917917436707], 
"eval_len": [77, 28, 33, 45, 69, 42, 55, 37, 44, 103]}

  4%|▍         | 39996/1000000 [23:30<7:39:15, 34.84it/s]global step 40000, trans_decision ep_re 336.4920544531708

{"global_step": 40000, "eval_re": [359.15869210209115, 146.32449959916443, 
396.6495729724201, 382.64479909255124, 403.62581692317326, 151.33662800918563, 
382.36650692323303, 424.2006785346979, 358.39570981662814, 360.21764055856266], 
"eval_len": [67, 28, 73, 72, 74, 29, 72, 78, 66, 67]}

  5%|▍         | 49996/1000000 [29:57<7:37:01, 34.64it/s]global step 50000, trans_decision ep_re 230.76123972664445

{"global_step": 50000, "eval_re": [412.8288454715227, 194.0598641510401, 
125.2958726427059, 355.1455778280671, 141.08460173705188, 396.3983929284098, 
212.2699526397556, 157.00320225846124, 162.02397977503804, 151.502107834392], 
"eval_len": [77, 37, 24, 66, 27, 74, 40, 30, 31, 29]}

  6%|▌         | 59996/1000000 [36:34<7:42:18, 33.89it/s]global step 60000, trans_decision ep_re 319.76520825547016

{"global_step": 60000, "eval_re": [388.7003981974161, 141.11356913846024, 
404.6142799915171, 351.6830734429689, 367.3780047895334, 194.02356071310194, 
215.41286066474774, 372.88856380651055, 393.8398230113918, 367.9979487990539], 
"eval_len": [72, 27, 73, 65, 69, 37, 41, 69, 71, 68]}

  7%|▋         | 69996/1000000 [43:12<7:26:07, 34.74it/s]global step 70000, trans_decision ep_re 286.3171763586084

{"global_step": 70000, "eval_re": [391.2518037999797, 343.2621446566599, 
151.29867305881626, 144.98215679156726, 372.1358224895173, 413.11526644432445, 
328.68896848043795, 125.54225812030587, 397.4186279192407, 195.47604182523395], 
"eval_len": [72, 64, 29, 28, 68, 74, 62, 24, 72, 37]}

  8%|▊         | 79996/1000000 [50:00<7:18:35, 34.96it/s]global step 80000, trans_decision ep_re 279.1446369705892

{"global_step": 80000, "eval_re": [166.608090525127, 373.812345140814, 
146.25311455587703, 279.5833324774114, 146.79512839716034, 395.7199215958188, 
256.71608153581155, 408.7382590330364, 261.413919116359, 355.8061773284764], 
"eval_len": [32, 68, 28, 53, 28, 72, 48, 76, 49, 67]}

  9%|▉         | 89996/1000000 [56:23<7:13:18, 35.00it/s]global step 90000, trans_decision ep_re 342.74543501716187

{"global_step": 90000, "eval_re": [367.84836597575384, 166.942993696535, 
383.9746224838348, 330.55632316121495, 333.32179236495, 324.3424162733475, 
363.60618204432143, 286.0480483385635, 441.96080802397074, 428.85279780912714], 
"eval_len": [68, 32, 70, 62, 62, 61, 68, 55, 81, 75]}

 10%|▉         | 99996/1000000 [1:03:10<7:09:34, 34.92it/s]global step 100000, trans_decision ep_re 226.06620508747974

{"global_step": 100000, "eval_re": [177.48967448099893, 265.5752309439008, 
151.45096046887792, 140.2476402375607, 230.96716827721173, 398.59119799002934, 
193.010279244232, 226.16845378526827, 141.3457261834269, 335.8157192632908], 
"eval_len": [34, 50, 29, 27, 44, 72, 37, 43, 27, 63]}

 11%|█         | 109996/1000000 [1:09:50<7:15:16, 34.08it/s]global step 110000, trans_decision ep_re 293.2585491367206

{"global_step": 110000, "eval_re": [405.8665256103307, 129.78006580479374, 
360.02650836000333, 405.83865789166475, 398.855965474584, 389.75564267499675, 
140.92324302299087, 161.2877681877043, 393.9152001639902, 146.33591417614727], 
"eval_len": [76, 25, 68, 73, 72, 71, 27, 31, 72, 28]}

 12%|█▏        | 119996/1000000 [1:16:13<7:00:59, 34.84it/s]global step 120000, trans_decision ep_re 261.9066379517649

{"global_step": 120000, "eval_re": [226.71267629802526, 177.6560949948916, 
320.43449726273064, 378.17518224336044, 416.1817788674492, 136.24879702775343, 
369.16318508153944, 130.29503994876575, 130.55476706818715, 333.64436072494556],
"eval_len": [43, 34, 60, 70, 77, 26, 68, 25, 25, 62]}

 13%|█▎        | 129996/1000000 [1:23:00<6:56:51, 34.78it/s]global step 130000, trans_decision ep_re 294.57997082595887

{"global_step": 130000, "eval_re": [146.10060109719413, 135.59490324076023, 
336.30334114473504, 406.0654411932181, 388.79580636568517, 388.7497430893706, 
407.027475934236, 151.21816657675282, 161.9718244399212, 423.97240517771456], 
"eval_len": [28, 26, 63, 75, 71, 71, 77, 29, 31, 77]}

 14%|█▍        | 139996/1000000 [1:29:23<6:51:17, 34.85it/s]global step 140000, trans_decision ep_re 290.1760912725913

{"global_step": 140000, "eval_re": [354.4452703105999, 155.94384092125915, 
407.9405025311639, 151.24071038835288, 356.07708620930066, 150.9171125287762, 
450.9948783034175, 360.09341705204275, 377.9668599022144, 136.1412345787858], 
"eval_len": [66, 30, 75, 29, 65, 29, 82, 66, 69, 26]}

 15%|█▍        | 149996/1000000 [1:36:10<6:44:21, 35.03it/s]global step 150000, trans_decision ep_re 283.51615385727507

{"global_step": 150000, "eval_re": [171.87652913886478, 452.1052849632573, 
211.23149072143136, 408.9534441055329, 129.66800463175153, 389.6378996947042, 
130.76361706626994, 151.1486926245776, 414.52388088841803, 375.25269473794265], 
"eval_len": [33, 80, 40, 74, 25, 70, 25, 29, 75, 69]}

 16%|█▌        | 159996/1000000 [1:42:33<6:48:28, 34.27it/s]global step 160000, trans_decision ep_re 247.10461194369745

{"global_step": 160000, "eval_re": [171.8038637618864, 336.363726855565, 
141.01009023535624, 370.32095841613926, 249.8171181477464, 389.5001780605657, 
195.35447176643927, 298.4391219447336, 167.3821510880025, 151.05443916053977], 
"eval_len": [33, 62, 27, 69, 47, 74, 37, 57, 32, 29]}

 17%|█▋        | 169996/1000000 [1:49:20<6:31:36, 35.32it/s]global step 170000, trans_decision ep_re 309.5269022254812

{"global_step": 170000, "eval_re": [184.18224332924783, 468.41293178187834, 
337.7882931967013, 156.50266174817304, 342.52618653622545, 433.5809879290666, 
374.0647625700953, 156.46677054019597, 359.0084394362167, 282.7357451870113], 
"eval_len": [35, 86, 62, 30, 63, 79, 69, 30, 67, 53]}

 18%|█▊        | 179996/1000000 [1:55:44<6:31:13, 34.93it/s]global step 180000, trans_decision ep_re 302.65888141197996

{"global_step": 180000, "eval_re": [350.7946682383521, 145.9503522510797, 
402.88669393602333, 151.3094024761558, 302.3118653584549, 340.2546750823277, 
315.87104697348747, 371.49098940178345, 326.4468387000172, 319.27228170211816], 
"eval_len": [65, 28, 73, 29, 58, 64, 60, 69, 63, 60]}

 19%|█▉        | 189996/1000000 [2:02:30<6:25:41, 35.00it/s]global step 190000, trans_decision ep_re 285.36314864852733

{"global_step": 190000, "eval_re": [444.15820442151943, 473.6642480555479, 
162.84357442851342, 120.31124234628433, 428.6770157289113, 220.37368540526322, 
302.43966542063043, 371.1602255232344, 193.61884706218666, 136.38477809318246], 
"eval_len": [81, 87, 31, 23, 77, 42, 57, 70, 37, 26]}

 20%|█▉        | 199996/1000000 [2:08:55<6:22:22, 34.87it/s]global step 200000, trans_decision ep_re 212.47649078046143

{"global_step": 200000, "eval_re": [151.31689068263333, 188.32511435726536, 
157.1825216922829, 465.222610378117, 387.85960874661987, 151.50730283042813, 
140.61791669223223, 162.1045683017954, 136.07793395382618, 184.55044016941403], 
"eval_len": [29, 36, 30, 86, 71, 29, 27, 31, 26, 35]}

 21%|██        | 209996/1000000 [2:15:30<6:26:05, 34.10it/s]global step 210000, trans_decision ep_re 280.94060279835355

{"global_step": 210000, "eval_re": [156.14215153272684, 146.32086649936582, 
188.0659749480984, 140.84101663596041, 365.48219622303867, 406.4366403179643, 
399.3689134109126, 420.1045625311419, 429.5162266687773, 157.12747921554964], 
"eval_len": [30, 28, 36, 27, 68, 74, 74, 75, 76, 30]}

 22%|██▏       | 219996/1000000 [2:22:07<6:11:53, 34.96it/s]global step 220000, trans_decision ep_re 307.4832368482933

{"global_step": 220000, "eval_re": [381.7286098223719, 426.5312706223428, 
151.15533834339428, 172.8314276912499, 421.86371690685087, 480.2373872554899, 
135.80017742947797, 216.05915343573844, 376.5097811195996, 312.1155058564175], 
"eval_len": [73, 78, 29, 33, 78, 88, 26, 41, 69, 58]}

 23%|██▎       | 229996/1000000 [2:28:42<6:07:38, 34.91it/s]global step 230000, trans_decision ep_re 289.1227444650702

{"global_step": 230000, "eval_re": [204.8747056778449, 405.3611315399849, 
167.73013680261673, 399.2659437535875, 290.48518454251746, 266.0154965855626, 
372.52965667837, 188.77785402211472, 229.54669762632838, 366.64063742177433], 
"eval_len": [39, 75, 32, 74, 54, 50, 70, 36, 43, 67]}

 24%|██▍       | 239996/1000000 [2:35:30<6:01:37, 35.03it/s]global step 240000, trans_decision ep_re 376.71880923969735

{"global_step": 240000, "eval_re": [443.5928524589306, 151.336459167641, 
141.39867384097315, 442.5270813291341, 455.0821226535979, 369.52793790148763, 
421.1037022051074, 192.09115117991135, 392.84346902219045, 757.6846426380001], 
"eval_len": [80, 29, 27, 79, 82, 68, 77, 37, 71, 147]}

 25%|██▍       | 249996/1000000 [2:41:53<6:06:05, 34.14it/s]global step 250000, trans_decision ep_re 290.2605161695231

{"global_step": 250000, "eval_re": [524.4191038844909, 202.24890778591984, 
453.74978258853537, 507.3096167264372, 156.11938943289667, 141.32280766096184, 
176.36719416436577, 161.586434204496, 411.12606649188655, 168.35585875524077], 
"eval_len": [95, 39, 82, 89, 30, 27, 34, 31, 74, 32]}

 26%|██▌       | 259996/1000000 [2:48:40<6:01:34, 34.11it/s]global step 260000, trans_decision ep_re 370.90316484807755

{"global_step": 260000, "eval_re": [327.0279536313192, 469.9150098308592, 
650.8036754234329, 166.8283666769118, 194.01411264964938, 435.94673619999224, 
500.9229417127414, 392.5240793462817, 130.0401820065271, 441.0085910030602], 
"eval_len": [61, 85, 120, 32, 37, 78, 88, 73, 25, 79]}

 27%|██▋       | 269996/1000000 [2:55:04<5:48:24, 34.92it/s]global step 270000, trans_decision ep_re 254.37333132279537

{"global_step": 270000, "eval_re": [156.73495489257746, 200.20519089509446, 
475.5913977878837, 135.75201483566093, 395.63049017333566, 177.84332791406285, 
244.50964608269368, 135.54385290747814, 475.73820085257034, 146.1842368865964], 
"eval_len": [30, 38, 84, 26, 74, 34, 46, 26, 84, 28]}

 28%|██▊       | 279996/1000000 [3:01:50<5:40:13, 35.27it/s]global step 280000, trans_decision ep_re 329.21376959079714

{"global_step": 280000, "eval_re": [405.43713317210074, 285.56422040276885, 
426.11380884787457, 349.0642454232185, 177.06343664884417, 392.9886834147943, 
383.6371284195676, 221.87704066266267, 214.43111349838753, 435.96088541775276], 
"eval_len": [75, 56, 79, 66, 34, 75, 70, 42, 41, 79]}

 29%|██▉       | 289996/1000000 [3:08:15<5:37:47, 35.03it/s]global step 290000, trans_decision ep_re 297.7556012770191

{"global_step": 290000, "eval_re": [173.33719580814682, 243.0839969811664, 
193.96329110868976, 448.1440107497807, 156.99499732678967, 145.8192434459209, 
513.7301592538035, 195.6295688289818, 484.7666203002644, 422.0869289666465], 
"eval_len": [33, 46, 37, 82, 30, 28, 91, 37, 88, 77]}

 30%|██▉       | 299996/1000000 [3:14:50<5:39:38, 34.35it/s]global step 300000, trans_decision ep_re 404.8508377997831

{"global_step": 300000, "eval_re": [445.03009758066935, 414.28766218428547, 
220.7122430372969, 220.26149552615172, 140.7815934269655, 713.4967544494218, 
167.11842118750383, 657.5654976237537, 412.71066207064734, 656.5439509111354], 
"eval_len": [79, 75, 42, 42, 27, 125, 32, 117, 76, 115]}

 31%|███       | 309996/1000000 [3:21:26<5:26:34, 35.21it/s]global step 310000, trans_decision ep_re 381.26247871349995

{"global_step": 310000, "eval_re": [433.46726774956016, 596.3476925898937, 
238.60516723009133, 498.5347302897455, 404.13382940655686, 398.18055776977735, 
497.8383149886076, 156.5101946695227, 447.77135603062385, 141.2356764106205], 
"eval_len": [78, 110, 45, 90, 74, 72, 90, 30, 82, 27]}

 32%|███▏      | 319996/1000000 [3:28:01<5:23:43, 35.01it/s]global step 320000, trans_decision ep_re 320.11944160559557

{"global_step": 320000, "eval_re": [644.9940081885971, 135.74616675342452, 
415.3317946995304, 473.05641369736253, 493.2200508117746, 157.1269669036371, 
135.53535324499643, 454.4314524958885, 140.2799478001574, 151.4722614605869], 
"eval_len": [120, 26, 76, 87, 87, 30, 26, 82, 27, 29]}

 33%|███▎      | 329996/1000000 [3:34:36<5:19:51, 34.91it/s]global step 330000, trans_decision ep_re 362.02928334832507

{"global_step": 330000, "eval_re": [222.07243991307212, 265.1300604507162, 
579.4098338391282, 464.5887790240104, 211.22172466905636, 487.9690267867326, 
151.8001546016971, 437.73346160352355, 171.78337283199278, 628.5839797633215], 
"eval_len": [42, 50, 101, 84, 40, 86, 29, 82, 33, 110]}

 34%|███▍      | 339996/1000000 [3:41:12<5:13:28, 35.09it/s]global step 340000, trans_decision ep_re 288.7153505797184

{"global_step": 340000, "eval_re": [205.46558841275217, 466.60208177278423, 
156.52986698661527, 259.7583001766375, 448.1923140040049, 259.9195932672323, 
262.01772274987195, 270.13175318323243, 130.76809688221138, 427.768188361842], 
"eval_len": [39, 85, 30, 49, 81, 49, 49, 51, 25, 78]}

 35%|███▍      | 349996/1000000 [3:47:47<5:15:06, 34.38it/s]global step 350000, trans_decision ep_re 326.62539872365716

{"global_step": 350000, "eval_re": [413.0164528602895, 405.6169233416755, 
189.1188897294859, 502.31617347843434, 135.5012272068296, 399.0201943187319, 
459.203784567324, 146.1365656361904, 417.6497328606861, 198.67404323692404], 
"eval_len": [74, 76, 36, 90, 26, 76, 83, 28, 75, 38]}

 36%|███▌      | 359996/1000000 [3:54:22<5:04:40, 35.01it/s]global step 360000, trans_decision ep_re 305.3195002898507

{"global_step": 360000, "eval_re": [210.49560945496845, 331.03567781328314, 
403.40591079303965, 162.59329032260058, 146.4947316456247, 230.15287701802336, 
688.4634450328332, 151.61134666385885, 360.4828427408685, 368.45927141340576], 
"eval_len": [40, 62, 73, 31, 28, 44, 123, 29, 67, 65]}

 37%|███▋      | 369996/1000000 [4:00:57<4:58:55, 35.13it/s]global step 370000, trans_decision ep_re 383.81192583430555

{"global_step": 370000, "eval_re": [454.46638375929143, 415.866684174562, 
167.85390559386582, 461.14063336312324, 631.272125597098, 561.972205230027, 
537.9420785728942, 266.66918013767946, 178.3663679151522, 162.56969399936202], 
"eval_len": [80, 75, 32, 83, 111, 100, 96, 50, 34, 31]}

 38%|███▊      | 379996/1000000 [4:07:33<4:56:38, 34.84it/s]global step 380000, trans_decision ep_re 321.55799216918007

{"global_step": 380000, "eval_re": [439.88751345109927, 450.49965878682576, 
396.9249930084784, 204.1596695821071, 215.8076572146951, 146.49309175772441, 
156.98133867861813, 358.4749299972513, 398.5804566369604, 447.7706125780416], 
"eval_len": [78, 84, 71, 39, 41, 28, 30, 66, 73, 79]}

 39%|███▉      | 389996/1000000 [4:14:20<4:58:04, 34.11it/s]global step 390000, trans_decision ep_re 289.9493503581637

{"global_step": 390000, "eval_re": [200.88887680935792, 408.39366191737565, 
130.265210432915, 237.48689942320064, 451.51501181881565, 441.3409900291006, 
156.428944452139, 215.75159377525745, 490.25377408169055, 167.16854084178428], 
"eval_len": [38, 75, 25, 45, 80, 79, 30, 41, 86, 32]}

 40%|███▉      | 399996/1000000 [4:20:43<4:52:04, 34.24it/s]global step 400000, trans_decision ep_re 255.92255164678855

{"global_step": 400000, "eval_re": [451.4732187185989, 171.74580465504042, 
136.22921667892112, 171.72796176712004, 173.04076876794238, 125.44000365004976, 
395.95869172566165, 584.1162534966192, 193.31528675126708, 156.1783102566645], 
"eval_len": [82, 33, 26, 33, 33, 24, 74, 106, 37, 30]}

 41%|████      | 409996/1000000 [4:27:30<4:41:15, 34.96it/s]global step 410000, trans_decision ep_re 350.7473902573647

{"global_step": 410000, "eval_re": [253.63667434015008, 146.698769777746, 
460.8316824207679, 177.8577508833301, 403.5444690062049, 461.94895534856647, 
484.9098436444361, 504.00160790501457, 146.05499443148207, 467.98915481594884], 
"eval_len": [48, 28, 83, 34, 73, 82, 88, 91, 28, 83]}

 42%|████▏     | 419996/1000000 [4:33:53<4:35:03, 35.14it/s]global step 420000, trans_decision ep_re 295.7752968633505

{"global_step": 420000, "eval_re": [395.1732293910102, 136.08076886354934, 
542.1343361479494, 151.5096062645457, 389.9209485372675, 199.33463488017566, 
211.102184898832, 510.6912033550023, 228.07539336036854, 193.73066293480372], 
"eval_len": [74, 26, 97, 29, 72, 38, 40, 90, 43, 37]}

 43%|████▎     | 429996/1000000 [4:40:27<4:29:06, 35.30it/s]global step 430000, trans_decision ep_re 270.6559596583368

{"global_step": 430000, "eval_re": [174.05118411661024, 422.55584818886, 
161.68357047657486, 339.06454401211533, 124.89718406945859, 146.5887591520128, 
151.493939308891, 384.2827221072455, 387.62300494921885, 414.3188402023812], 
"eval_len": [33, 78, 31, 63, 24, 28, 29, 73, 72, 75]}

 44%|████▍     | 439996/1000000 [4:47:01<4:32:47, 34.21it/s]global step 440000, trans_decision ep_re 240.17194903258437

{"global_step": 440000, "eval_re": [156.60530931498278, 232.24461060418693, 
130.37530560019135, 247.2238671485117, 178.55907371429274, 141.34327431964465, 
234.67488071920576, 440.1734765674273, 417.6793786500469, 222.84031368735353], 
"eval_len": [30, 44, 25, 47, 34, 27, 44, 83, 77, 42]}

 45%|████▍     | 449996/1000000 [4:53:36<4:21:16, 35.08it/s]global step 450000, trans_decision ep_re 246.8875723864855

{"global_step": 450000, "eval_re": [402.3962397331348, 352.4379798073809, 
350.24951405777455, 301.20434422733535, 151.8964087957337, 141.0845807852716, 
205.9711706440738, 203.84643517526757, 147.15731343730096, 212.63173720158179], 
"eval_len": [74, 64, 67, 56, 29, 27, 39, 39, 28, 40]}

 46%|████▌     | 459996/1000000 [5:00:10<4:17:24, 34.96it/s]global step 460000, trans_decision ep_re 377.72404722555893

{"global_step": 460000, "eval_re": [201.10016677076732, 412.2340580596529, 
482.25360967763197, 446.46904417940993, 396.6556591783232, 435.5720551571884, 
429.0909423658285, 449.0718006429795, 130.74575985619452, 394.0473763676134], 
"eval_len": [38, 76, 84, 81, 76, 80, 78, 81, 25, 73]}

 47%|████▋     | 469996/1000000 [5:06:45<4:11:58, 35.06it/s]global step 470000, trans_decision ep_re 308.82047953597066

{"global_step": 470000, "eval_re": [384.87642228639066, 146.52005578757206, 
166.93050961881443, 273.22934656266375, 398.9774425682437, 463.7932319901141, 
460.0796329887174, 423.6721633726848, 239.29325929345976, 130.83273089104574], 
"eval_len": [71, 28, 32, 51, 73, 83, 82, 76, 45, 25]}

 48%|████▊     | 479996/1000000 [5:13:21<4:08:45, 34.84it/s]global step 480000, trans_decision ep_re 227.79360785624613

{"global_step": 480000, "eval_re": [291.7296940348476, 239.66652363844526, 
146.13035986332096, 141.20269733062025, 151.54379469696744, 435.8371216080793, 
145.98295595213594, 178.41788667650025, 314.5228695124147, 232.9021752491295], 
"eval_len": [55, 45, 28, 27, 29, 79, 28, 34, 59, 44]}

 49%|████▉     | 489996/1000000 [5:19:57<4:08:42, 34.18it/s]global step 490000, trans_decision ep_re 318.5080587097452

{"global_step": 490000, "eval_re": [394.911651722281, 415.24654388817265, 
335.00558456248854, 450.15905214856053, 261.15509278275783, 156.8910707914608, 
398.91587904537755, 173.87219444749417, 400.5399519975215, 198.38356571133716], 
"eval_len": [74, 74, 63, 81, 49, 30, 73, 33, 73, 38]}

 50%|████▉     | 499996/1000000 [5:26:31<3:57:04, 35.15it/s]global step 500000, trans_decision ep_re 249.50556749742572

{"global_step": 500000, "eval_re": [141.2329158411503, 168.22786111118373, 
413.3543749367531, 413.4779907628363, 177.92533940118096, 141.06607375027366, 
162.06549307340978, 141.603294002945, 188.31943755751396, 547.7828945370103], 
"eval_len": [27, 32, 74, 78, 34, 27, 31, 27, 36, 96]}

 51%|█████     | 509996/1000000 [5:33:06<3:54:43, 34.79it/s]global step 510000, trans_decision ep_re 306.79811695209435

{"global_step": 510000, "eval_re": [130.44888712530707, 152.11908440322387, 
188.15011754371835, 141.22375149764275, 619.44541185264, 361.13725431562085, 
437.142320967035, 473.205881556921, 429.4315927966488, 135.67686746218632], 
"eval_len": [25, 29, 36, 27, 105, 68, 81, 85, 76, 26]}

 52%|█████▏    | 519996/1000000 [5:39:50<3:47:20, 35.19it/s]global step 520000, trans_decision ep_re 357.6798570655486

{"global_step": 520000, "eval_re": [413.3582602901867, 167.49275640565656, 
384.73935942712944, 431.93620099577123, 424.1320352539344, 405.80764128588635, 
173.36422948758698, 463.3066685238639, 360.7826713380579, 351.8787476474129], 
"eval_len": [74, 32, 71, 78, 75, 73, 33, 84, 66, 65]}

 53%|█████▎    | 529996/1000000 [5:46:14<3:42:37, 35.19it/s]global step 530000, trans_decision ep_re 255.8612642923993

{"global_step": 530000, "eval_re": [162.26297493895459, 429.61663173226, 
152.41410039566702, 379.9589047469184, 195.16495326671523, 449.75310344088365, 
195.12805180763496, 270.6261119116187, 151.31414921707548, 172.3736614662654], 
"eval_len": [31, 77, 29, 71, 37, 80, 37, 51, 29, 33]}

 54%|█████▍    | 539996/1000000 [5:53:00<3:42:00, 34.53it/s]global step 540000, trans_decision ep_re 284.85178628877173

{"global_step": 540000, "eval_re": [419.37452001793986, 200.21169178785883, 
177.82592970371329, 391.79077760970307, 228.11980776293166, 614.3148700273897, 
136.0983938058982, 156.80829716008765, 140.95995109990199, 383.0136239122936], 
"eval_len": [77, 38, 34, 73, 43, 107, 26, 30, 27, 70]}

 55%|█████▍    | 549996/1000000 [5:59:23<3:34:40, 34.94it/s]global step 550000, trans_decision ep_re 272.16620643376353

{"global_step": 550000, "eval_re": [407.99609983212764, 146.09425244651013, 
377.4034217186806, 370.8829886681446, 189.27279314906053, 366.6847836619305, 
178.21143341066417, 168.32575650289635, 344.098416336379, 172.69211861124174], 
"eval_len": [75, 28, 70, 68, 36, 68, 34, 32, 63, 33]}

 56%|█████▌    | 559996/1000000 [6:05:58<3:28:50, 35.11it/s]global step 560000, trans_decision ep_re 268.8301496174653

{"global_step": 560000, "eval_re": [372.57818253539216, 194.98037175752216, 
481.1585977202048, 141.08936522653013, 214.47514893329213, 151.10481056159406, 
167.33990691948264, 157.26133234304012, 634.510277583157, 173.80350259443796], 
"eval_len": [68, 37, 82, 27, 41, 29, 32, 30, 110, 33]}

 57%|█████▋    | 569996/1000000 [6:12:31<3:23:16, 35.26it/s]global step 570000, trans_decision ep_re 231.47323723233086

{"global_step": 570000, "eval_re": [140.92810296197908, 387.22511413037, 
140.7299582900805, 157.42674944339115, 161.68864195933313, 167.896191057956, 
467.19810574159175, 403.29753764451965, 135.75521478071946, 152.58675631336757],
"eval_len": [27, 71, 27, 30, 31, 32, 85, 74, 26, 29]}

 58%|█████▊    | 579996/1000000 [6:19:04<3:19:47, 35.04it/s]global step 580000, trans_decision ep_re 346.71565370881837

{"global_step": 580000, "eval_re": [238.59579928605734, 580.0551027560456, 
529.6627810192322, 519.4258165409731, 235.8477688448915, 254.9066697716975, 
428.6358292678272, 130.515565030516, 371.3818390975891, 178.12936547335403], 
"eval_len": [45, 101, 96, 93, 45, 48, 80, 25, 69, 34]}

 59%|█████▉    | 589996/1000000 [6:25:43<3:18:27, 34.43it/s]global step 590000, trans_decision ep_re 267.6260957100719

{"global_step": 590000, "eval_re": [224.56333940393827, 399.68734255633984, 
166.76062901330226, 221.61176358356064, 479.400786690586, 223.39692915664253, 
151.09338289504836, 496.1473050358561, 166.9432039033507, 146.65627486209402], 
"eval_len": [43, 72, 32, 42, 86, 42, 29, 91, 32, 28]}

 60%|█████▉    | 599996/1000000 [6:32:16<3:11:15, 34.86it/s]global step 600000, trans_decision ep_re 338.29528558507775

{"global_step": 600000, "eval_re": [411.0331716096971, 172.89744339759775, 
156.92945134241913, 483.16254087308374, 445.3447374448778, 444.99815101102405, 
199.04586102079554, 478.1156412351982, 172.48022394865023, 418.9456339674339], 
"eval_len": [73, 33, 30, 87, 77, 79, 38, 83, 33, 76]}

 61%|██████    | 609996/1000000 [6:38:50<3:05:19, 35.08it/s]global step 610000, trans_decision ep_re 278.2242404052128

{"global_step": 610000, "eval_re": [440.02066252482933, 437.7269997238885, 
205.76511002650358, 384.9692934950629, 178.35625909026976, 125.58170559341175, 
163.1342815557412, 200.55106290137948, 135.98470006540128, 510.1523290756403], 
"eval_len": [79, 79, 39, 71, 34, 24, 31, 38, 26, 89]}

 62%|██████▏   | 619996/1000000 [6:45:24<3:00:17, 35.13it/s]global step 620000, trans_decision ep_re 256.5675835833316

{"global_step": 620000, "eval_re": [205.3462490664466, 509.3914617619678, 
157.17867199864568, 194.48167253197886, 167.56661701384783, 130.748850997686, 
151.6722543759208, 505.5075980656004, 402.4761720475795, 141.30628797364227], 
"eval_len": [39, 91, 30, 37, 32, 25, 29, 92, 73, 27]}

 63%|██████▎   | 629996/1000000 [6:52:10<2:58:39, 34.52it/s]global step 630000, trans_decision ep_re 340.6022996235512

{"global_step": 630000, "eval_re": [313.5446986021801, 325.04316426138814, 
333.71898584040883, 416.521942207529, 256.45976437471, 162.5936300533191, 
360.23506731198466, 463.26148366581083, 401.101607101931, 373.54265281624953], 
"eval_len": [59, 62, 62, 76, 48, 31, 66, 84, 73, 68]}

 64%|██████▍   | 639996/1000000 [6:58:32<2:50:40, 35.15it/s]global step 640000, trans_decision ep_re 317.3724008676684

{"global_step": 640000, "eval_re": [512.6854591804403, 512.6445937625946, 
217.54465305781235, 130.69963224031798, 314.7389863224574, 488.162425156134, 
177.6647724144593, 483.8822080759079, 173.1571119708342, 162.54416649572562], 
"eval_len": [92, 91, 41, 25, 59, 89, 34, 85, 33, 31]}

 65%|██████▍   | 649996/1000000 [7:05:06<2:46:13, 35.09it/s]global step 650000, trans_decision ep_re 355.13391397107017

{"global_step": 650000, "eval_re": [416.2891500873081, 575.0425279098573, 
157.29505046426044, 151.65802607203082, 141.2514960757556, 446.67039052858433, 
475.59492252354465, 461.79548532187493, 156.60680610365418, 569.1352846238312], 
"eval_len": [75, 104, 30, 29, 27, 81, 85, 84, 30, 98]}

 66%|██████▌   | 659996/1000000 [7:11:50<2:40:53, 35.22it/s]global step 660000, trans_decision ep_re 303.7234465903222

{"global_step": 660000, "eval_re": [205.35786581788977, 162.3193749187311, 
146.0078826314555, 228.29058212331392, 519.040494177462, 450.0869083326055, 
190.28692662832927, 372.29066893038777, 473.547969912299, 290.0057924307483], 
"eval_len": [39, 31, 28, 43, 91, 81, 36, 68, 85, 54]}

 67%|██████▋   | 669996/1000000 [7:18:14<2:39:35, 34.46it/s]global step 670000, trans_decision ep_re 299.39882756842434

{"global_step": 670000, "eval_re": [221.95150554100434, 220.69396744854188, 
285.7151015105151, 413.7666451420952, 344.6592885284453, 173.00486462054081, 
456.20972700775087, 238.22392350501278, 156.60094879026033, 483.1623035900773], 
"eval_len": [42, 42, 54, 74, 65, 33, 85, 45, 30, 87]}

 68%|██████▊   | 679996/1000000 [7:25:00<2:32:32, 34.96it/s]global step 680000, trans_decision ep_re 376.10617437606146

{"global_step": 680000, "eval_re": [380.9636713086146, 639.2344496137707, 
571.0382623759118, 194.11393449264187, 618.6221486925194, 212.78705717326085, 
243.59037142657172, 167.85278349600566, 571.1251393800773, 161.73392580124064], 
"eval_len": [69, 112, 101, 37, 109, 40, 46, 32, 101, 31]}

 69%|██████▉   | 689996/1000000 [7:31:23<2:27:02, 35.14it/s]global step 690000, trans_decision ep_re 255.4661016198317

{"global_step": 690000, "eval_re": [179.01188594312106, 168.69566911056393, 
201.57170945994105, 184.25307626032722, 512.546539099919, 271.8243178775721, 
146.41497693332025, 173.67296986989953, 569.9864954178388, 146.6833762258143], 
"eval_len": [34, 32, 38, 35, 91, 51, 28, 33, 101, 28]}

 70%|██████▉   | 699996/1000000 [7:37:57<2:22:39, 35.05it/s]global step 700000, trans_decision ep_re 403.05393367802833

{"global_step": 700000, "eval_re": [162.2228004059003, 624.7099627528771, 
612.8073681061861, 240.04000898424403, 542.5340798366807, 184.50679412399768, 
450.7123022298855, 469.42092115674956, 581.4857397437686, 162.09935943999383], 
"eval_len": [31, 110, 104, 45, 100, 35, 81, 83, 102, 31]}

 71%|███████   | 709996/1000000 [7:44:31<2:18:42, 34.84it/s]global step 710000, trans_decision ep_re 268.5235237901532

{"global_step": 710000, "eval_re": [141.42444145541955, 572.1647498791829, 
613.946686722509, 184.34810958026264, 152.20927149782972, 167.9519451797616, 
178.83764669329602, 162.67245970541495, 167.05447301292892, 344.62545417492635],
"eval_len": [27, 105, 106, 35, 29, 32, 34, 31, 32, 63]}

 72%|███████▏  | 719996/1000000 [7:51:05<2:13:17, 35.01it/s]global step 720000, trans_decision ep_re 274.45536475344755

{"global_step": 720000, "eval_re": [261.24454274313456, 146.5201620546491, 
179.23923552629918, 532.8121478806764, 391.8636891942814, 227.9173344368059, 
206.00449507417486, 440.38908243370804, 216.89204825258005, 141.6709099381666], 
"eval_len": [49, 28, 34, 92, 71, 43, 39, 79, 41, 27]}

 73%|███████▎  | 729996/1000000 [7:57:50<2:08:45, 34.95it/s]global step 730000, trans_decision ep_re 305.7634001273323

{"global_step": 730000, "eval_re": [517.7866546526176, 480.91561892591477, 
141.59504498059812, 276.6599839323452, 241.79502537317967, 134.30231004881688, 
522.3942474184569, 136.3133106256863, 167.30897418455547, 438.5628311311519], 
"eval_len": [92, 85, 27, 52, 46, 26, 95, 26, 32, 82]}

 74%|███████▍  | 739996/1000000 [8:04:13<2:04:24, 34.83it/s]global step 740000, trans_decision ep_re 298.58490307656473

{"global_step": 740000, "eval_re": [368.6402273249692, 477.85459234935956, 
504.8453633488973, 141.62539075932244, 220.97708477218825, 146.63650156930072, 
283.97797792544293, 145.8496592564266, 281.6555661195005, 413.78666734023915], 
"eval_len": [68, 90, 93, 27, 42, 28, 53, 28, 53, 75]}

 75%|███████▍  | 749996/1000000 [8:10:47<2:01:23, 34.33it/s]global step 750000, trans_decision ep_re 326.6675898559035

{"global_step": 750000, "eval_re": [156.41539468510953, 379.5025814290022, 
136.20743124603146, 453.89893990872065, 647.3851951117641, 194.91542931225806, 
527.0544908576127, 156.94695062929492, 468.47532715856397, 145.87415822067695], 
"eval_len": [30, 71, 26, 81, 112, 37, 96, 30, 86, 28]}

 76%|███████▌  | 759996/1000000 [8:17:21<1:54:35, 34.91it/s]global step 760000, trans_decision ep_re 378.7773943794642

{"global_step": 760000, "eval_re": [141.5148260120027, 435.67082567758126, 
467.89498398595725, 465.58467825582426, 524.2918082043429, 194.2295940743983, 
211.63289694314142, 400.5864600928596, 484.709127627987, 461.6587429205478], 
"eval_len": [27, 78, 84, 85, 99, 37, 40, 71, 85, 83]}

 77%|███████▋  | 769996/1000000 [8:23:55<1:48:58, 35.18it/s]global step 770000, trans_decision ep_re 261.53023262803777

{"global_step": 770000, "eval_re": [146.26618408669708, 400.55175036794776, 
156.84405210278456, 447.16750761706913, 368.6925742658875, 179.1511125619186, 
190.1884469523005, 219.6811147111114, 215.94606075440285, 290.8135228602581], 
"eval_len": [28, 75, 30, 80, 68, 34, 36, 42, 41, 54]}

 78%|███████▊  | 779996/1000000 [8:30:40<1:43:44, 35.35it/s]global step 780000, trans_decision ep_re 327.201421503157

{"global_step": 780000, "eval_re": [415.4867076350133, 473.8175718281683, 
184.64612850918937, 485.24168293631055, 477.62761499406196, 575.4800199611171, 
146.187997782246, 193.70995059502235, 199.56660839362507, 120.24993239681622], 
"eval_len": [75, 84, 35, 87, 86, 104, 28, 37, 38, 23]}

 79%|███████▉  | 789996/1000000 [8:37:02<1:41:11, 34.59it/s]global step 790000, trans_decision ep_re 286.5438563782728

{"global_step": 790000, "eval_re": [168.3552607479285, 145.66451735228162, 
135.86505507465554, 162.97228528238756, 379.7814867953142, 457.07167784902606, 
183.60547278302872, 412.0273592259379, 505.41553966757033, 314.67990900459733], 
"eval_len": [32, 28, 26, 31, 68, 82, 35, 76, 90, 58]}

 80%|███████▉  | 799996/1000000 [8:43:35<1:34:33, 35.25it/s]global step 800000, trans_decision ep_re 417.25229752152273

{"global_step": 800000, "eval_re": [522.5958687528233, 244.49862922300574, 
167.8251544737077, 503.9026462260999, 130.46208676750118, 472.01423390323987, 
613.4617807178773, 775.911112935981, 563.3581200984164, 178.49334211657492], 
"eval_len": [94, 46, 32, 90, 25, 86, 109, 137, 100, 34]}

 81%|████████  | 809996/1000000 [8:50:20<1:30:54, 34.83it/s]global step 810000, trans_decision ep_re 239.56229854725993

{"global_step": 810000, "eval_re": [183.2085825771336, 198.98677219587427, 
371.2124676427611, 216.6143761512316, 261.7014952187882, 179.88680528321456, 
354.15196693171305, 252.09014118402735, 215.8441410470997, 161.92623724075597], 
"eval_len": [35, 38, 68, 41, 49, 34, 67, 48, 41, 31]}

 82%|████████▏ | 819996/1000000 [8:56:44<1:25:50, 34.95it/s]global step 820000, trans_decision ep_re 362.1960938932589

{"global_step": 820000, "eval_re": [613.370324510347, 211.26055891060338, 
409.9111003157411, 413.21962092073403, 141.2224803985037, 151.8568637232069, 
223.7800915197769, 530.6602754561756, 458.4969829470762, 468.18264023042406], 
"eval_len": [112, 40, 74, 75, 27, 29, 42, 95, 82, 87]}

 83%|████████▎ | 829996/1000000 [9:03:30<1:22:30, 34.34it/s]global step 830000, trans_decision ep_re 343.2123855281144

{"global_step": 830000, "eval_re": [220.95444002235394, 156.74852027111007, 
150.60159459493343, 1003.6881013696202, 157.47253012101692, 131.10393141658398, 
466.9006187085914, 246.95470178817249, 161.73135275505868, 735.9680642337025], 
"eval_len": [42, 30, 29, 169, 30, 25, 84, 47, 31, 131]}

 84%|████████▍ | 839996/1000000 [9:09:55<1:17:52, 34.24it/s]global step 840000, trans_decision ep_re 253.30142713833735

{"global_step": 840000, "eval_re": [264.48457943355413, 231.96896937191983, 
194.6615248324861, 494.49084762174965, 212.00600707512803, 426.547103919275, 
152.84655414723252, 220.8505624507983, 151.37332357314568, 183.78479895808434], 
"eval_len": [50, 44, 37, 87, 40, 76, 29, 42, 29, 35]}

 85%|████████▍ | 849996/1000000 [9:16:40<1:11:34, 34.93it/s]global step 850000, trans_decision ep_re 190.43355717849843

{"global_step": 850000, "eval_re": [130.65157408490825, 344.0485816071289, 
179.0990596355675, 162.85817758128533, 135.56148853998988, 265.15948957051427, 
130.41742848810782, 125.40187001947109, 222.06981224887156, 209.0680900091397], 
"eval_len": [25, 64, 34, 31, 26, 50, 25, 24, 42, 39]}

 86%|████████▌ | 859996/1000000 [9:23:05<1:06:57, 34.85it/s]global step 860000, trans_decision ep_re 297.77356548102114

{"global_step": 860000, "eval_re": [166.88916297142413, 412.3820373358218, 
420.61169292294994, 146.3199875956432, 206.23781416754866, 849.7264730179375, 
178.19321925307267, 283.76417958339135, 156.6724946360559, 156.93859332636592], 
"eval_len": [32, 76, 76, 28, 39, 150, 34, 53, 30, 30]}

 87%|████████▋ | 869996/1000000 [9:29:50<1:01:56, 34.98it/s]global step 870000, trans_decision ep_re 318.8436411566733

{"global_step": 870000, "eval_re": [298.45398012086946, 125.07069919442417, 
474.5108889237941, 492.4555765524943, 255.3617966451365, 174.25243563776397, 
146.11327881259106, 494.7851724682445, 413.3175089073917, 314.11507430402366], 
"eval_len": [57, 24, 85, 90, 48, 33, 28, 90, 77, 59]}

 88%|████████▊ | 879997/1000000 [9:36:36<1:07:46, 29.51it/s]global step 880000, trans_decision ep_re 305.3182940232756

{"global_step": 880000, "eval_re": [168.30260534746768, 189.8229166003224, 
189.8848185629851, 248.23699520747243, 205.4453501717399, 403.6067336074452, 
500.1858080442458, 437.9994314305901, 227.34165265303713, 482.3566286074504], 
"eval_len": [32, 36, 36, 47, 39, 74, 88, 79, 43, 86]}

 89%|████████▉ | 889999/1000000 [9:44:04<51:13, 35.79it/s]global step 890000, trans_decision ep_re 382.64751340592227

{"global_step": 890000, "eval_re": [466.70179201922747, 151.5979092566134, 
195.32200377560898, 125.35832236794259, 493.62621343891533, 610.4273470029947, 
478.6653152414185, 563.950442002006, 266.24206180128783, 474.5837271532076], 
"eval_len": [85, 29, 37, 24, 87, 108, 86, 97, 50, 86]}

 90%|████████▉ | 899997/1000000 [9:50:57<52:58, 31.47it/s]global step 900000, trans_decision ep_re 336.8644592190585

{"global_step": 900000, "eval_re": [214.53270806793327, 173.1469657968263, 
227.96093150058564, 482.3382425320743, 465.3249681620114, 529.7709801409413, 
157.12898683030042, 445.4505433112867, 477.15535642622194, 195.83490942240394], 
"eval_len": [41, 33, 43, 88, 83, 93, 30, 83, 85, 37]}

 91%|█████████ | 909998/1000000 [9:57:52<42:14, 35.50it/s]global step 910000, trans_decision ep_re 322.76662008154113

{"global_step": 910000, "eval_re": [135.29401151061975, 856.1248423198012, 
540.9209071761504, 317.466403607342, 222.73456281615972, 222.90344903460903, 
141.26816637633092, 491.8579704605799, 162.88621880423926, 136.20966870957847], 
"eval_len": [26, 170, 97, 61, 42, 42, 27, 89, 31, 26]}

 92%|█████████▏| 919998/1000000 [10:11:31<1:22:56, 16.08it/s]global step 920000, trans_decision ep_re 234.54144511293276

{"global_step": 920000, "eval_re": [178.85466145096171, 412.26868807409954, 
167.70436504995882, 152.06896096679685, 206.23249469543845, 140.79486931622006, 
461.68323606899577, 157.35115599755193, 258.140555282654, 210.31546422665045], 
"eval_len": [34, 76, 32, 29, 39, 27, 81, 30, 49, 40]}

 93%|█████████▎| 929999/1000000 [10:37:24<1:38:47, 11.81it/s]global step 930000, trans_decision ep_re 303.2350651414946

{"global_step": 930000, "eval_re": [302.12248848980016, 381.3007963247852, 
141.4309921209614, 614.6908246261326, 141.81399113791264, 173.24426564751562, 
546.4360690116421, 298.63279670093755, 244.28253295421752, 188.3958944010409], 
"eval_len": [57, 69, 27, 108, 27, 33, 91, 56, 46, 36]}

 94%|█████████▍| 939997/1000000 [10:56:56<28:24, 35.20it/s]global step 940000, trans_decision ep_re 342.74975912667134

{"global_step": 940000, "eval_re": [565.9295070473293, 130.91272012611407, 
556.84219735756, 427.61871694686073, 151.7806501978519, 194.02283273402037, 
627.2254188994908, 168.55963103120877, 419.76103524441896, 184.8448816818584], 
"eval_len": [94, 25, 102, 79, 29, 37, 114, 32, 75, 35]}

 95%|█████████▍| 949997/1000000 [11:03:27<23:15, 35.84it/s]global step 950000, trans_decision ep_re 276.25717492445494

{"global_step": 950000, "eval_re": [234.63225958941356, 179.2622130501143, 
140.8133941490878, 435.8386016825748, 244.25795222921252, 158.1911862950306, 
256.2041395622535, 391.2259851480791, 141.1300434660587, 581.0159740727245], 
"eval_len": [44, 34, 27, 79, 46, 30, 48, 69, 27, 111]}

 96%|█████████▌| 959997/1000000 [11:10:00<18:40, 35.70it/s]global step 960000, trans_decision ep_re 284.24745382512936

{"global_step": 960000, "eval_re": [451.236306535904, 162.30466317459147, 
124.82413351381686, 433.1376353979319, 485.81638850603713, 446.59864148315705, 
189.25490977023546, 217.30560651366852, 201.1228113390114, 130.8734420169401], 
"eval_len": [80, 31, 24, 78, 87, 80, 36, 41, 38, 25]}

 97%|█████████▋| 969996/1000000 [11:16:10<13:57, 35.83it/s]global step 970000, trans_decision ep_re 365.2593908995122

{"global_step": 970000, "eval_re": [964.0336099996235, 222.55180778170566, 
198.8374622942453, 286.986445935277, 147.11564316214248, 424.3492957947832, 
588.5831237217153, 183.9914960518123, 130.2018418959736, 505.94318235784345], 
"eval_len": [190, 42, 38, 53, 28, 77, 106, 35, 25, 88]}

 98%|█████████▊| 979996/1000000 [11:22:32<09:18, 35.81it/s]global step 980000, trans_decision ep_re 397.7312078601809

{"global_step": 980000, "eval_re": [135.97409167067417, 429.50326943140715, 
157.24355809106893, 425.31446570372475, 411.0717732194184, 359.19756612145113, 
497.8734659238332, 546.1889834797496, 500.918061726076, 514.0268432344058], 
"eval_len": [26, 80, 30, 77, 74, 66, 87, 94, 87, 89]}

 99%|█████████▉| 989996/1000000 [11:28:55<04:38, 35.95it/s]global step 990000, trans_decision ep_re 336.66973406390247

{"global_step": 990000, "eval_re": [511.2523781319024, 228.3396940174223, 
278.7324555246318, 426.23221848281554, 418.7431899120712, 453.9103427374008, 
125.21956154559524, 387.3191926034842, 406.58260917444505, 130.36569850925554], 
"eval_len": [89, 43, 52, 78, 75, 82, 24, 70, 73, 25]}

100%|█████████▉| 999996/1000000 [11:35:16<00:00, 35.82it/s]global step 1000000, trans_decision ep_re 395.24885745599397

{"global_step": 1000000, "eval_re": [365.44685476055423, 347.53547348621015, 
397.3348914057823, 243.93731075202626, 157.2645284265766, 151.95443689841613, 
306.9218257276134, 1007.2745062170127, 386.42712808400967, 588.3916188017376], 
"eval_len": [67, 64, 72, 46, 30, 29, 57, 180, 70, 105]}

100%|██████████| 1000000/1000000 [11:35:29<00:00, 23.96it/s]
