
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [05:20<12:16:36, 22.40it/s]global step 10000, trans_decision ep_re 175.0554670743094

{"global_step": 10000, "eval_re": [136.3019874107964, 150.57137780073703, 
245.63067619029522, 129.81500736022264, 125.09400508012602, 166.99478413874482, 
108.22693088010837, 250.43882884604253, 287.98079793996044, 149.50027509606056],
"eval_len": [26, 29, 46, 25, 24, 32, 21, 48, 56, 29]}

  2%|▏         | 19999/1000000 [15:30<12:11:38, 22.32it/s]global step 20000, trans_decision ep_re 262.6598663483952

{"global_step": 20000, "eval_re": [120.68484189233081, 114.98628974943867, 
157.0472065300326, 394.5138820290343, 129.3600606525017, 250.20158135808265, 
465.819230628722, 169.85443120817055, 401.5824977091713, 422.5486417264673], 
"eval_len": [23, 22, 30, 73, 25, 47, 87, 32, 73, 80]}

  3%|▎         | 29997/1000000 [25:50<12:13:25, 22.04it/s]global step 30000, trans_decision ep_re 155.06194720860736

{"global_step": 30000, "eval_re": [108.92611152627609, 141.30105806701837, 
128.94702886295744, 167.8360962827311, 135.00623593828186, 125.10235071281323, 
145.74959590087752, 305.4492750188964, 140.31336118990276, 151.98835858631875], 
"eval_len": [21, 27, 25, 32, 26, 24, 28, 59, 27, 29]}

  4%|▍         | 39999/1000000 [36:00<12:07:54, 21.98it/s]global step 40000, trans_decision ep_re 201.31312598694277

{"global_step": 40000, "eval_re": [155.7772764553216, 205.13889086530418, 
116.75517797790172, 103.00707138874438, 114.62390212327112, 124.23702929617949, 
158.44788540831243, 196.40613843923194, 502.0882509608967, 336.649636954264], 
"eval_len": [30, 39, 23, 20, 22, 24, 30, 37, 94, 61]}

  5%|▍         | 49999/1000000 [46:20<11:50:41, 22.28it/s]global step 50000, trans_decision ep_re 277.1445856256138

{"global_step": 50000, "eval_re": [390.0942738656818, 146.56132900537864, 
339.0812321613904, 125.8266358087609, 392.05127392187416, 327.2347584991236, 
365.9193258814578, 204.55344530858227, 129.89173685370181, 350.23184495018654], 
"eval_len": [74, 28, 61, 24, 71, 60, 71, 38, 25, 64]}

  6%|▌         | 59999/1000000 [56:20<11:59:41, 21.77it/s]global step 60000, trans_decision ep_re 162.8822571201054

{"global_step": 60000, "eval_re": [103.0903210189435, 163.2629396178802, 
139.51870324731445, 413.4241725136798, 139.8709104220652, 114.19142327986769, 
129.925461612226, 139.90980129757048, 150.92542947740577, 134.7034087141009], 
"eval_len": [20, 31, 27, 75, 27, 22, 25, 27, 29, 26]}

  7%|▋         | 69999/1000000 [1:06:50<11:50:50, 21.81it/s]global step 70000, trans_decision ep_re 210.8843053415839

{"global_step": 70000, "eval_re": [155.74381519827372, 108.92033006865513, 
103.53639088345531, 144.82951965588117, 109.23019675489978, 328.5135214864289, 
191.9370976021626, 203.63023252124196, 354.31822188628917, 408.1837273585515], 
"eval_len": [30, 21, 20, 28, 21, 61, 37, 38, 67, 75]}

  8%|▊         | 79998/1000000 [1:17:10<11:33:01, 22.13it/s]global step 80000, trans_decision ep_re 165.21553099455514

{"global_step": 80000, "eval_re": [114.88963650744127, 287.1479962263769, 
304.56531143988326, 124.53640389304147, 146.41713388995475, 141.68123216344708, 
122.9823732485613, 141.7961357762813, 159.14367814020136, 108.99540866036241], 
"eval_len": [22, 56, 59, 24, 28, 27, 24, 27, 31, 21]}

  9%|▉         | 89998/1000000 [1:27:30<11:28:14, 22.04it/s]global step 90000, trans_decision ep_re 192.69289393825903

{"global_step": 90000, "eval_re": [113.2444418831394, 171.45665912618784, 
175.9286306040763, 369.55223925420313, 130.0035388198717, 119.20881785417173, 
154.29387236760098, 149.86569913860302, 393.2481672348576, 150.12687309987876], 
"eval_len": [22, 33, 34, 67, 25, 23, 30, 29, 71, 29]}

 10%|▉         | 99997/1000000 [1:37:50<11:29:01, 21.77it/s]global step 100000, trans_decision ep_re 174.84715043086163

{"global_step": 100000, "eval_re": [120.37160748301558, 152.70180387667565, 
176.63295873252835, 119.79528177619133, 140.7116079881505, 151.24626932530128, 
414.5445869907665, 131.3440940298863, 188.6541647174811, 152.4691293886196], 
"eval_len": [23, 30, 34, 23, 27, 29, 76, 25, 36, 29]}

 11%|█         | 109998/1000000 [1:48:10<11:15:44, 21.95it/s]global step 110000, trans_decision ep_re 166.99361939046253

{"global_step": 110000, "eval_re": [156.5474221047967, 145.64046473219514, 
124.35331109315608, 161.06653841705986, 166.95408571925, 121.10395064717082, 
189.48234429146572, 156.03369545891786, 183.8556883798707, 264.89869306074235], 
"eval_len": [30, 28, 24, 31, 32, 23, 36, 30, 35, 52]}

 12%|█▏        | 119997/1000000 [1:58:30<11:07:03, 21.99it/s]global step 120000, trans_decision ep_re 223.78739775346327

{"global_step": 120000, "eval_re": [108.73700879751249, 124.96040814710331, 
127.75175760128894, 411.0552394205088, 319.4635684994072, 371.2457477070232, 
119.87739805012863, 140.5741084581566, 131.10734266906096, 383.1013981844426], 
"eval_len": [21, 24, 25, 76, 59, 68, 23, 27, 25, 69]}

 13%|█▎        | 129999/1000000 [2:08:50<11:02:21, 21.89it/s]global step 130000, trans_decision ep_re 161.70960522735405

{"global_step": 130000, "eval_re": [141.33396799775045, 125.03796597920162, 
144.83101143368208, 145.92548446224816, 102.16968722536478, 146.94351452331418, 
353.0134109404105, 114.02980439702444, 145.17997316913883, 198.63123214540536], 
"eval_len": [27, 24, 28, 28, 20, 28, 66, 22, 28, 38]}

 14%|█▍        | 139999/1000000 [2:19:10<11:03:18, 21.61it/s]global step 140000, trans_decision ep_re 176.40343240052707

{"global_step": 140000, "eval_re": [125.10902526676645, 114.53992591481985, 
366.82159671282955, 312.8893928402273, 134.58789518711802, 130.9255383588105, 
141.99091586028237, 144.7755832040775, 135.61537552376907, 156.77907513657033], 
"eval_len": [24, 22, 66, 61, 26, 25, 27, 28, 26, 30]}

 15%|█▍        | 149999/1000000 [2:29:30<10:45:47, 21.94it/s]global step 150000, trans_decision ep_re 213.20090448869806

{"global_step": 150000, "eval_re": [135.85894054900166, 103.52932948269192, 
129.7231197232465, 127.9157374662316, 328.7851987684893, 550.0044579065768, 
125.5704601516984, 360.9208090888285, 129.26365291403096, 140.43733883618484], 
"eval_len": [26, 20, 25, 25, 62, 115, 24, 68, 25, 27]}

 16%|█▌        | 159999/1000000 [2:39:42<10:40:57, 21.84it/s]global step 160000, trans_decision ep_re 187.2448377265991

{"global_step": 160000, "eval_re": [130.0210739819494, 326.07305449774674, 
177.80166010817678, 114.04363439794643, 141.4496030240459, 130.22017146803694, 
130.55764285176625, 434.77460770695143, 136.77244640894423, 150.7344828204268], 
"eval_len": [25, 61, 34, 22, 27, 25, 25, 81, 26, 29]}

 17%|█▋        | 169997/1000000 [2:50:20<10:38:14, 21.67it/s]global step 170000, trans_decision ep_re 183.64346830909736

{"global_step": 170000, "eval_re": [109.1024740928072, 155.03097878085245, 
129.89769649527497, 337.9294014827512, 130.8065883098578, 115.09599697538746, 
282.4965001415766, 236.3024574181926, 214.17294625904236, 125.59964313523096], 
"eval_len": [21, 30, 25, 63, 25, 22, 54, 44, 40, 24]}

 18%|█▊        | 179999/1000000 [3:00:40<10:22:45, 21.95it/s]global step 180000, trans_decision ep_re 210.9043595929764

{"global_step": 180000, "eval_re": [317.7959234605818, 130.42757006458118, 
159.21341747978616, 436.77981635896975, 166.40501076219286, 161.87236100478066, 
140.01919439134934, 142.0261495826369, 125.34073250848566, 329.1634203163996], 
"eval_len": [59, 25, 31, 80, 32, 31, 27, 27, 24, 63]}

 19%|█▉        | 189999/1000000 [3:11:00<10:21:09, 21.73it/s]global step 190000, trans_decision ep_re 274.052113951101

{"global_step": 190000, "eval_re": [424.1498989815409, 160.6507846273884, 
472.8025981547398, 166.21837873905014, 276.59193591862436, 161.94187225994918, 
454.9333840428429, 347.84693553495856, 129.43313073766456, 145.95222051425108], 
"eval_len": [79, 31, 86, 32, 55, 31, 85, 65, 25, 28]}

 20%|█▉        | 199999/1000000 [3:21:11<10:13:07, 21.75it/s]global step 200000, trans_decision ep_re 209.8868934631555

{"global_step": 200000, "eval_re": [114.63066842813434, 126.17933390269906, 
345.8497616035602, 358.8845430955324, 381.0062651369697, 144.76966946681628, 
145.1258305891434, 184.10455090033685, 169.16954448026675, 129.14876702809582], 
"eval_len": [22, 24, 65, 67, 71, 28, 28, 35, 32, 25]}

 21%|██        | 209998/1000000 [3:31:50<9:57:39, 22.03it/s]global step 210000, trans_decision ep_re 150.4342956702422

{"global_step": 210000, "eval_re": [140.7987783303053, 114.69756308168056, 
140.90188476147435, 97.29939653427063, 246.0007363980963, 130.65377170465862, 
210.175092634748, 131.0941961488949, 167.97414024156805, 124.74739686672552], 
"eval_len": [27, 22, 27, 19, 46, 25, 40, 25, 32, 24]}

 22%|██▏       | 219997/1000000 [3:42:10<9:53:05, 21.92it/s]global step 220000, trans_decision ep_re 251.86923293200098

{"global_step": 220000, "eval_re": [372.2250594158703, 141.79517300924525, 
124.8430731838085, 157.21109758169206, 126.30634789016753, 155.1436012689125, 
303.4426492875099, 120.26834644967431, 444.30282248297954, 573.15415875015], 
"eval_len": [68, 27, 24, 30, 24, 30, 58, 23, 81, 106]}

 23%|██▎       | 229999/1000000 [3:52:30<9:51:53, 21.68it/s]global step 230000, trans_decision ep_re 212.91887524503582

{"global_step": 230000, "eval_re": [107.96847673536132, 145.7042240106557, 
229.7771640948786, 130.70674038816705, 103.10270355396764, 126.6046237902388, 
433.0989563500766, 288.76382996278005, 142.15060096124685, 421.3114326029854], 
"eval_len": [21, 28, 43, 25, 20, 24, 79, 56, 27, 78]}

 24%|██▍       | 239999/1000000 [4:02:50<9:44:23, 21.68it/s]global step 240000, trans_decision ep_re 250.20910990738412

{"global_step": 240000, "eval_re": [387.772476593088, 120.15862332379774, 
120.14189888415582, 163.08861956714057, 372.3278335655614, 165.6325697763156, 
383.61289521860465, 273.8176761537248, 124.98052189513305, 390.55798409631905], 
"eval_len": [71, 23, 23, 31, 68, 32, 72, 52, 24, 71]}

 25%|██▍       | 249999/1000000 [4:13:10<9:22:40, 22.22it/s]global step 250000, trans_decision ep_re 228.40313612398342

{"global_step": 250000, "eval_re": [195.0604145730058, 120.43976106037357, 
164.86145620400652, 113.32731314994297, 176.1680403141349, 414.5554025695738, 
371.36241179937724, 382.1315514822476, 119.95353129591419, 226.17147879125756], 
"eval_len": [37, 23, 32, 22, 34, 77, 69, 70, 23, 43]}

 26%|██▌       | 259999/1000000 [4:23:30<9:18:59, 22.06it/s]global step 260000, trans_decision ep_re 192.45130002965098

{"global_step": 260000, "eval_re": [141.54310864328693, 129.4617548712579, 
225.39469801118548, 130.08453013967934, 131.1591674026499, 134.32731314940892, 
205.82437906588368, 396.09818520291225, 114.91752522994783, 315.7023385802977], 
"eval_len": [27, 25, 42, 25, 25, 26, 39, 73, 22, 58]}

 27%|██▋       | 269999/1000000 [4:33:50<9:17:35, 21.82it/s]global step 270000, trans_decision ep_re 274.9739196096521

{"global_step": 270000, "eval_re": [125.14185063889093, 485.39775070824015, 
134.6774911567788, 135.60453240718257, 114.26050038218672, 321.82411348044366, 
542.3688989993815, 182.7411174674198, 417.5698233012262, 290.1531175547707], 
"eval_len": [24, 91, 26, 26, 22, 59, 100, 35, 76, 53]}

 28%|██▊       | 279999/1000000 [4:44:10<9:05:46, 21.99it/s]global step 280000, trans_decision ep_re 247.85376811887733

{"global_step": 280000, "eval_re": [506.60213160653626, 114.44391350923188, 
131.3197144770632, 412.9267205048897, 166.54235647266313, 364.38887216340527, 
367.418730772017, 146.53381804930325, 108.9257507269696, 159.43567290669347], 
"eval_len": [95, 22, 25, 77, 32, 66, 67, 28, 21, 31]}

 29%|██▉       | 289997/1000000 [4:54:30<9:01:05, 21.87it/s]global step 290000, trans_decision ep_re 220.01074346210007

{"global_step": 290000, "eval_re": [339.0804352821422, 119.96789968592658, 
403.1318023005775, 140.6744304836362, 411.50567366979885, 143.617047994544, 
166.18921484541633, 141.52740240530568, 135.24562349558573, 199.1679044580675], 
"eval_len": [63, 23, 73, 27, 76, 28, 32, 27, 26, 38]}

 30%|██▉       | 299997/1000000 [5:04:50<8:57:03, 21.72it/s]global step 300000, trans_decision ep_re 263.0022367690761

{"global_step": 300000, "eval_re": [553.8009449297352, 322.3053065324329, 
413.30120827698676, 102.67026820952375, 139.88050577599222, 358.066498331458, 
302.20689995128856, 109.24178407199524, 181.5039339683788, 147.04501764296953], 
"eval_len": [105, 60, 76, 20, 27, 67, 54, 21, 35, 28]}

 31%|███       | 309997/1000000 [5:15:10<8:46:19, 21.85it/s]global step 310000, trans_decision ep_re 184.00492196133897

{"global_step": 310000, "eval_re": [125.09468024973347, 125.39166801293807, 
343.3177598606786, 134.8565291239527, 135.22068085294939, 114.47696359500259, 
437.0976428590457, 155.88516406100632, 108.82506837940512, 159.88306261867743], 
"eval_len": [24, 24, 64, 26, 26, 22, 78, 30, 21, 31]}

 32%|███▏      | 319998/1000000 [5:25:30<8:31:18, 22.17it/s]global step 320000, trans_decision ep_re 223.08874956867112

{"global_step": 320000, "eval_re": [135.73323228627055, 316.07207399517443, 
339.5285781451246, 167.08415553521385, 125.49512456152797, 324.2179856160894, 
342.1786176709946, 190.16522667767762, 139.96398371567352, 150.44851748296483], 
"eval_len": [26, 59, 63, 32, 24, 60, 62, 36, 27, 29]}

 33%|███▎      | 329998/1000000 [5:35:50<8:28:54, 21.94it/s]global step 330000, trans_decision ep_re 176.57821319576493

{"global_step": 330000, "eval_re": [307.18174437896926, 131.26216175321238, 
136.13740491400597, 120.53156768138842, 140.84608665511823, 119.99777758186642, 
136.12287898319406, 119.96315792489452, 423.67815928705, 130.06119279795], 
"eval_len": [57, 25, 26, 23, 27, 23, 26, 23, 76, 25]}

 34%|███▍      | 339998/1000000 [5:46:10<8:14:52, 22.23it/s]global step 340000, trans_decision ep_re 294.0270492905376

{"global_step": 340000, "eval_re": [417.72611442583343, 402.6737820648063, 
462.9213594727828, 184.8176883911147, 152.49740196639286, 189.33522823954814, 
401.91986412100425, 155.80949960949116, 436.8438917485004, 135.72566286590185], 
"eval_len": [80, 74, 85, 35, 29, 36, 76, 30, 77, 26]}

 35%|███▍      | 349998/1000000 [5:56:30<8:07:46, 22.21it/s]global step 350000, trans_decision ep_re 248.06801362849737

{"global_step": 350000, "eval_re": [451.54785606912105, 162.5668813699571, 
103.13602890116424, 419.9742261148693, 178.75515765744356, 443.2318441226061, 
144.5570543073975, 130.03651765615746, 130.86030902880694, 316.0142610574507], 
"eval_len": [84, 31, 20, 77, 34, 80, 28, 25, 25, 59]}

 36%|███▌      | 359998/1000000 [6:06:50<8:04:04, 22.03it/s]global step 360000, trans_decision ep_re 278.5706331606658

{"global_step": 360000, "eval_re": [439.70668404729923, 141.3960061557333, 
118.83540021416634, 207.2177228718513, 114.22859856739584, 875.0551849805993, 
141.02317362649444, 444.9072572503377, 130.95581150471227, 172.38049238806815], 
"eval_len": [81, 27, 23, 39, 22, 169, 27, 83, 25, 33]}

 37%|███▋      | 369998/1000000 [6:17:10<7:56:07, 22.05it/s]global step 370000, trans_decision ep_re 218.83888011279888

{"global_step": 370000, "eval_re": [157.54722800535075, 317.7426125433119, 
361.59429492133114, 253.74655913547846, 108.97866967997214, 141.7610292137526, 
156.73793777691435, 306.49474355222554, 274.3799753624499, 109.40575093720192], 
"eval_len": [30, 58, 67, 49, 21, 27, 30, 57, 51, 21]}

 38%|███▊      | 379998/1000000 [6:27:30<7:46:50, 22.14it/s]global step 380000, trans_decision ep_re 232.84095963731116

{"global_step": 380000, "eval_re": [126.1906128611305, 198.1105341608259, 
119.50538584476685, 102.66608502706917, 409.69006019218114, 123.3142473107392, 
151.88622663443323, 471.68648161071263, 252.94336529534337, 372.41659743590986],
"eval_len": [24, 37, 23, 20, 74, 24, 29, 87, 50, 68]}

 39%|███▉      | 389998/1000000 [6:37:50<7:41:35, 22.03it/s]global step 390000, trans_decision ep_re 262.5462875465583

{"global_step": 390000, "eval_re": [118.871640285686, 374.30892514883476, 
451.9602866700793, 360.5875706388637, 156.93140831930015, 129.43252563729834, 
373.7848648690288, 118.43837219673718, 400.50281735654227, 140.644464343212], 
"eval_len": [23, 68, 83, 65, 30, 25, 66, 23, 75, 27]}

 40%|███▉      | 399998/1000000 [6:48:10<7:33:00, 22.07it/s]global step 400000, trans_decision ep_re 233.31128282735668

{"global_step": 400000, "eval_re": [130.83242458880636, 364.6308017736919, 
469.485861005422, 380.37480435423186, 120.23475454305476, 145.46698734169695, 
134.97525596256315, 335.8526471882665, 143.251181061994, 108.00811045383938], 
"eval_len": [25, 69, 86, 73, 23, 28, 26, 61, 28, 21]}

 41%|████      | 409998/1000000 [6:58:30<7:25:57, 22.05it/s]global step 410000, trans_decision ep_re 169.81810110013095

{"global_step": 410000, "eval_re": [129.60341380685682, 212.1584574617102, 
167.04005095792195, 145.0774296180664, 164.94530299861674, 134.981396503184, 
365.2087725168093, 109.16947718666414, 139.9788131407877, 130.01789681069226], 
"eval_len": [25, 40, 32, 28, 32, 26, 67, 21, 27, 25]}

 42%|████▏     | 419998/1000000 [7:08:50<7:20:33, 21.94it/s]global step 420000, trans_decision ep_re 281.55894858715186

{"global_step": 420000, "eval_re": [424.94310379479026, 335.25773466992797, 
130.67313288052566, 425.04793867041997, 134.82430939001847, 140.1057563157851, 
287.95602204087515, 349.86231890664857, 146.73494362487463, 440.18422557765297],
"eval_len": [78, 63, 25, 80, 26, 27, 53, 64, 28, 80]}

 43%|████▎     | 429998/1000000 [7:19:10<7:12:43, 21.95it/s]global step 430000, trans_decision ep_re 202.41996699008402

{"global_step": 430000, "eval_re": [136.33541810497613, 362.2040460148976, 
136.20191212807967, 120.30507047521543, 120.64976582825403, 410.162006775116, 
291.104161086125, 153.9873637334595, 120.76557567394882, 172.4843500807679], 
"eval_len": [26, 67, 26, 23, 23, 76, 55, 30, 23, 33]}

 44%|████▍     | 439998/1000000 [7:29:30<7:03:17, 22.05it/s]global step 440000, trans_decision ep_re 252.71486606996717

{"global_step": 440000, "eval_re": [346.6224557542459, 174.16784085627174, 
151.43607998942076, 361.0009427287307, 119.53611053245892, 108.61341726993666, 
436.57732802665885, 341.5342902996636, 156.14003199740645, 331.520163244878], 
"eval_len": [64, 33, 29, 67, 23, 21, 79, 63, 30, 62]}

 45%|████▍     | 449998/1000000 [7:39:50<6:57:24, 21.96it/s]global step 450000, trans_decision ep_re 250.70177591723967

{"global_step": 450000, "eval_re": [416.80526444143226, 136.64677463904138, 
144.72515266886296, 528.1903793441095, 145.97601343993205, 374.43952719290604, 
299.92139755902923, 189.07236605150905, 124.65086198474599, 146.59002185082807],
"eval_len": [83, 26, 28, 107, 28, 70, 55, 36, 24, 28]}

 46%|████▌     | 459998/1000000 [7:50:10<6:48:06, 22.05it/s]global step 460000, trans_decision ep_re 256.8309481718084

{"global_step": 460000, "eval_re": [120.12221256794068, 310.8596821009727, 
482.15280828609036, 403.2148657858577, 167.75799157418356, 155.40961909971534, 
141.906193131773, 232.1528893188905, 130.87338175668262, 423.85983809597786], 
"eval_len": [23, 58, 88, 73, 32, 30, 27, 44, 25, 76]}

 47%|████▋     | 469998/1000000 [8:00:12<6:36:42, 22.27it/s]global step 470000, trans_decision ep_re 141.65525137602535

{"global_step": 470000, "eval_re": [168.37009377143207, 114.55363199384853, 
150.26821594211071, 135.5771971617721, 159.81804547311907, 135.2822051098499, 
130.95397223774648, 150.98668727662744, 140.8612059356644, 129.8812588580828], 
"eval_len": [32, 22, 29, 26, 31, 26, 25, 29, 27, 25]}

 48%|████▊     | 479999/1000000 [8:10:31<6:38:42, 21.74it/s]global step 480000, trans_decision ep_re 312.2639585774138

{"global_step": 480000, "eval_re": [103.2532243822487, 151.51680199163684, 
465.90597483327053, 388.3545700439353, 454.930455845652, 229.9798401047583, 
124.75374796578664, 531.6640149327271, 305.57549199192914, 366.70546368219385], 
"eval_len": [20, 29, 89, 70, 83, 43, 24, 99, 58, 66]}

 49%|████▉     | 489999/1000000 [8:20:51<6:30:53, 21.75it/s]global step 490000, trans_decision ep_re 271.57350142576945

{"global_step": 490000, "eval_re": [113.20483211669246, 167.6915637096818, 
393.06156189689193, 357.28939193431023, 184.72766833095352, 237.6803457679855, 
374.2716037600374, 167.40406035298165, 406.670793149009, 313.7331932391511], 
"eval_len": [22, 32, 73, 66, 35, 44, 72, 32, 76, 59]}

 50%|████▉     | 499999/1000000 [8:31:11<6:21:41, 21.83it/s]global step 500000, trans_decision ep_re 224.6342182007058

{"global_step": 500000, "eval_re": [216.58541304036524, 168.74560435844418, 
130.98645638017732, 129.83920807537405, 322.6714506921124, 375.8860839339116, 
145.5592612983362, 243.0472890612555, 125.805729590497, 387.2156855765841], 
"eval_len": [41, 32, 25, 25, 60, 68, 28, 45, 24, 72]}

 51%|█████     | 509999/1000000 [8:41:31<6:12:37, 21.92it/s]global step 510000, trans_decision ep_re 215.601262264447

{"global_step": 510000, "eval_re": [135.64875938852367, 491.4370727091234, 
159.47370181590247, 271.5185481522982, 183.08671590150163, 128.17104905876874, 
141.17927198234884, 336.1480519748032, 125.45392858551746, 183.89552307568232], 
"eval_len": [26, 94, 31, 53, 35, 25, 27, 61, 24, 35]}

 52%|█████▏    | 519998/1000000 [8:52:10<6:04:05, 21.97it/s]global step 520000, trans_decision ep_re 262.3747396176033

{"global_step": 520000, "eval_re": [359.8072955930514, 103.13926823850984, 
443.120590773546, 124.07488592835108, 393.5071185904522, 120.38898769120968, 
166.74351076769273, 137.0863276806885, 666.7689323186779, 109.11047859385435], 
"eval_len": [69, 20, 83, 24, 74, 23, 32, 26, 145, 21]}

 53%|█████▎    | 529997/1000000 [9:02:30<5:59:38, 21.78it/s]global step 530000, trans_decision ep_re 198.45717459819036

{"global_step": 530000, "eval_re": [138.7736135543669, 359.8070211746839, 
149.3949738935507, 354.5335251580645, 168.10661067291062, 167.04549217442596, 
114.9339684179869, 215.99704133177522, 180.82706509112714, 135.15243451301174], 
"eval_len": [27, 66, 29, 65, 32, 32, 22, 40, 34, 26]}

 54%|█████▍    | 539999/1000000 [9:12:50<5:49:28, 21.94it/s]global step 540000, trans_decision ep_re 215.93522143599247

{"global_step": 540000, "eval_re": [160.99035918875572, 157.23278566262232, 
124.64922944886384, 161.79124873817773, 141.66129345861611, 428.6488973997482, 
141.38289674270246, 114.17960544245412, 423.5489803328457, 305.26691794513835], 
"eval_len": [31, 30, 24, 31, 27, 77, 27, 22, 80, 55]}

 55%|█████▍    | 549999/1000000 [9:23:10<5:43:10, 21.85it/s]global step 550000, trans_decision ep_re 263.35128618794874

{"global_step": 550000, "eval_re": [249.62129363397474, 162.69365715360715, 
399.8953654054692, 263.54053918299473, 339.69689574150254, 130.42785187527934, 
129.0736346956557, 434.0779155605399, 366.9897037928055, 157.49600483765872], 
"eval_len": [47, 31, 75, 51, 64, 25, 25, 81, 66, 30]}

 56%|█████▌    | 559999/1000000 [9:33:22<5:36:00, 21.82it/s]global step 560000, trans_decision ep_re 168.67513029385867

{"global_step": 560000, "eval_re": [184.7094676012509, 109.39749828952577, 
120.15579869613138, 129.42588178725165, 167.25181969378585, 162.58105410360864, 
152.79483737519112, 438.08300467988477, 114.07238159850658, 108.27955911344978],
"eval_len": [35, 21, 23, 25, 32, 31, 29, 82, 22, 21]}

 57%|█████▋    | 569999/1000000 [9:43:41<5:28:40, 21.81it/s]global step 570000, trans_decision ep_re 301.7068354489378

{"global_step": 570000, "eval_re": [366.30726043291367, 114.0569961110262, 
190.08334831756375, 392.1931578023672, 184.67606062492206, 124.90917021053346, 
505.90168478744096, 437.120964582414, 318.72821803619127, 383.0914935840062], 
"eval_len": [67, 22, 36, 70, 35, 24, 99, 81, 59, 69]}

 58%|█████▊    | 579999/1000000 [9:54:10<5:18:16, 21.99it/s]global step 580000, trans_decision ep_re 296.6187750936406

{"global_step": 580000, "eval_re": [316.95333147962896, 151.02100646677764, 
140.2869392495345, 141.5537628593258, 379.26717561550976, 587.653709348344, 
333.51552604564864, 374.8171737463604, 437.9526571330683, 103.16646899220748], 
"eval_len": [59, 29, 27, 27, 71, 113, 61, 68, 83, 20]}

 59%|█████▉    | 589999/1000000 [10:04:30<5:11:25, 21.94it/s]global step 590000, trans_decision ep_re 276.75431531484753

{"global_step": 590000, "eval_re": [486.4901616531407, 146.0512660919769, 
167.6931926912015, 367.40833654426194, 431.75602631477943, 168.79100752605936, 
158.79280027928868, 130.6619227586599, 221.5436166260852, 488.35482266302137], 
"eval_len": [88, 28, 32, 67, 77, 32, 31, 25, 42, 88]}

 60%|█████▉    | 599999/1000000 [10:14:50<5:03:19, 21.98it/s]global step 600000, trans_decision ep_re 202.45188108548282

{"global_step": 600000, "eval_re": [135.04044862000418, 344.6056584281333, 
346.26650324493613, 131.22259073660194, 315.4069783901133, 151.84150320121324, 
155.61945496971836, 158.36607693876616, 150.18479013502744, 135.9648061903139], 
"eval_len": [26, 62, 64, 25, 58, 29, 30, 30, 29, 26]}

 61%|██████    | 609999/1000000 [10:25:10<4:58:34, 21.77it/s]global step 610000, trans_decision ep_re 165.74775468249027

{"global_step": 610000, "eval_re": [212.59354523186374, 156.47963950060836, 
138.70930532147327, 112.93274371209003, 173.40177813116605, 291.51884100104877, 
114.33099955253765, 145.0698166445516, 156.72908580024048, 155.71179192932277], 
"eval_len": [40, 30, 27, 22, 33, 55, 22, 28, 30, 30]}

 62%|██████▏   | 619997/1000000 [10:35:30<4:50:49, 21.78it/s]global step 620000, trans_decision ep_re 264.50004158113234

{"global_step": 620000, "eval_re": [363.94113250552454, 133.68831941739097, 
152.0902908838091, 448.2988479323517, 601.8424387440555, 114.68450207974857, 
397.1730168449997, 135.47611798425893, 163.10431853018855, 134.70143088899576], 
"eval_len": [67, 26, 29, 84, 107, 22, 71, 26, 31, 26]}

 63%|██████▎   | 629997/1000000 [10:45:50<4:42:40, 21.82it/s]global step 630000, trans_decision ep_re 232.49376230041327

{"global_step": 630000, "eval_re": [157.62024857226933, 354.71270478864517, 
334.4645657870192, 158.20581420588445, 154.59498829773702, 157.50976590105222, 
151.0604145326306, 325.1117712189218, 129.95095148593137, 401.7063982140414], 
"eval_len": [30, 66, 62, 30, 30, 30, 29, 61, 25, 73]}

 64%|██████▍   | 639999/1000000 [10:56:10<4:32:50, 21.99it/s]global step 640000, trans_decision ep_re 232.26933982357195

{"global_step": 640000, "eval_re": [125.0985968857735, 411.11815063547624, 
344.93321290875866, 119.91474799001044, 198.58254270409068, 325.0033194079294, 
140.3185132563731, 103.08674934817326, 124.83784432438998, 429.799720774744], 
"eval_len": [24, 76, 63, 23, 38, 60, 27, 20, 24, 78]}

 65%|██████▍   | 649997/1000000 [11:06:30<4:28:31, 21.72it/s]global step 650000, trans_decision ep_re 233.95701315300795

{"global_step": 650000, "eval_re": [145.16616577178712, 392.54442300394305, 
414.75784050107893, 165.29927804322298, 391.9305419041757, 109.5050072417748, 
114.52877015154388, 145.1694920347603, 340.49166442934853, 120.17694844844412], 
"eval_len": [28, 75, 75, 32, 72, 21, 22, 28, 62, 23]}

 66%|██████▌   | 659997/1000000 [11:16:50<4:19:49, 21.81it/s]global step 660000, trans_decision ep_re 239.638296719936

{"global_step": 660000, "eval_re": [537.4449508820107, 483.37326151232367, 
157.8534171718321, 124.2145488423838, 163.24722248852822, 125.54015579686026, 
114.86083440463284, 419.63091596097945, 129.0603506283543, 141.15730951145468], 
"eval_len": [93, 86, 30, 24, 31, 24, 22, 77, 25, 27]}

 67%|██████▋   | 669999/1000000 [11:27:10<4:12:09, 21.81it/s]global step 670000, trans_decision ep_re 180.62214715359804

{"global_step": 670000, "eval_re": [109.08888261380136, 209.66406170338212, 
142.3428526804262, 297.220986439565, 135.4217901005884, 360.55041749132346, 
140.10464411160973, 144.1771349694217, 131.04338319889322, 136.6073182269691], 
"eval_len": [21, 40, 27, 58, 26, 67, 27, 28, 25, 26]}

 68%|██████▊   | 679997/1000000 [11:37:30<4:04:13, 21.84it/s]global step 680000, trans_decision ep_re 265.24226226080515

{"global_step": 680000, "eval_re": [162.43922770301504, 525.3594185672919, 
185.57121843469258, 439.16808919365866, 129.4312256989171, 230.52833836206116, 
97.33679475901563, 134.94883306220993, 436.5249424382501, 311.1145343889393], 
"eval_len": [31, 91, 35, 81, 25, 44, 19, 26, 80, 58]}

 69%|██████▉   | 689997/1000000 [11:47:50<3:56:56, 21.81it/s]global step 690000, trans_decision ep_re 258.2811113100818

{"global_step": 690000, "eval_re": [227.83028057326007, 321.5245381870347, 
467.1053104690124, 431.83738654781627, 185.20428050744843, 136.04238648348363, 
160.2928268015797, 108.14430659899307, 119.63978717008378, 425.1900097621059], 
"eval_len": [43, 59, 86, 79, 35, 26, 31, 21, 23, 78]}

 70%|██████▉   | 699997/1000000 [11:58:10<3:49:39, 21.77it/s]global step 700000, trans_decision ep_re 149.67582240103198

{"global_step": 700000, "eval_re": [142.32958207189012, 130.54208997781694, 
200.58137821763577, 146.55393685445063, 119.32372157462143, 130.06482670456316, 
164.5352861530883, 147.33880607918198, 146.7332173865372, 168.7553789905339], 
"eval_len": [27, 25, 38, 28, 23, 25, 31, 28, 28, 32]}

 71%|███████   | 709998/1000000 [12:08:30<3:37:03, 22.27it/s]global step 710000, trans_decision ep_re 300.28140863018467

{"global_step": 710000, "eval_re": [114.50552999153476, 346.46664956448046, 
124.29720721383296, 354.48782634644294, 150.59135349512104, 141.24769077381197, 
518.7560983570968, 530.5537811711218, 338.3259888824021, 383.5819605060019], 
"eval_len": [22, 64, 24, 65, 29, 27, 91, 94, 65, 70]}

 72%|███████▏  | 719998/1000000 [12:18:50<3:32:28, 21.96it/s]global step 720000, trans_decision ep_re 238.86319280030042

{"global_step": 720000, "eval_re": [436.2206873107139, 124.74172612293621, 
131.3556079554119, 447.8073178377319, 129.7452581168994, 202.13406677286122, 
501.2464451662444, 140.1649435338399, 108.82597208776104, 166.38990309860432], 
"eval_len": [83, 24, 25, 83, 25, 39, 93, 27, 21, 32]}

 73%|███████▎  | 729998/1000000 [12:29:10<3:24:36, 21.99it/s]global step 730000, trans_decision ep_re 245.7823239467179

{"global_step": 730000, "eval_re": [167.4699884322554, 548.0233580818253, 
136.1084198063984, 124.5297238082858, 173.16191750237897, 461.1352023997086, 
124.84449666204897, 390.15689960049264, 131.02278943385235, 201.37044373993257],
"eval_len": [32, 97, 26, 24, 33, 81, 24, 73, 25, 39]}

 74%|███████▍  | 739998/1000000 [12:39:30<3:16:44, 22.03it/s]global step 740000, trans_decision ep_re 251.51223439785713

{"global_step": 740000, "eval_re": [360.795342651486, 160.5057332576166, 
155.51962408801296, 322.35321262023376, 135.4830001840846, 151.2353235164754, 
164.34420540748383, 401.3114473137097, 141.30559183759692, 522.2688631018717], 
"eval_len": [65, 31, 30, 61, 26, 29, 31, 72, 27, 102]}

 75%|███████▍  | 749998/1000000 [12:49:50<3:08:45, 22.08it/s]global step 750000, trans_decision ep_re 192.9016095458448

{"global_step": 750000, "eval_re": [114.54879866026674, 146.46096498562716, 
291.42955018626833, 151.2578005030127, 137.6412560878589, 370.5640263661404, 
200.04357791144432, 220.02174464082282, 171.47125233453855, 125.57712378246788],
"eval_len": [22, 28, 57, 29, 26, 68, 38, 42, 33, 24]}

 76%|███████▌  | 759998/1000000 [13:00:10<3:02:32, 21.91it/s]global step 760000, trans_decision ep_re 188.2516683345853

{"global_step": 760000, "eval_re": [108.75965939825016, 151.6103838576199, 
150.50899178551938, 130.26113319107722, 406.40300759502986, 129.61844137066825, 
395.3363038514587, 141.45323177629908, 134.9235102446895, 133.6420202752409], 
"eval_len": [21, 29, 29, 25, 76, 25, 73, 27, 26, 26]}

 77%|███████▋  | 769998/1000000 [13:10:30<2:54:04, 22.02it/s]global step 770000, trans_decision ep_re 268.5412256494007

{"global_step": 770000, "eval_re": [136.8987374694958, 447.70455295145746, 
156.42272325438776, 415.4386979352222, 140.3955592680963, 108.75248861373835, 
300.85588018095274, 388.06608700237643, 149.96201029157217, 440.9155195267077], 
"eval_len": [26, 80, 30, 75, 27, 21, 55, 71, 29, 83]}

 78%|███████▊  | 779998/1000000 [13:20:50<2:46:08, 22.07it/s]global step 780000, trans_decision ep_re 211.23532361673838

{"global_step": 780000, "eval_re": [427.18202485165097, 147.17877305466197, 
141.67335927374629, 398.28417727978797, 113.76578549984336, 118.93717714215562, 
144.1920220147076, 242.99184584337496, 206.95118948710365, 171.19688172035143], 
"eval_len": [81, 28, 27, 76, 22, 23, 28, 47, 39, 33]}

 79%|███████▉  | 789997/1000000 [13:31:10<2:40:33, 21.80it/s]global step 790000, trans_decision ep_re 244.14221062641613

{"global_step": 790000, "eval_re": [108.4915959255116, 173.6374352077446, 
387.8562858702597, 190.91408233261848, 160.6821950566852, 386.20368884528114, 
243.21162770084317, 113.74786341014583, 531.149885437708, 145.5274464773632], 
"eval_len": [21, 33, 72, 36, 31, 70, 46, 22, 96, 28]}

 80%|███████▉  | 799997/1000000 [13:41:30<2:31:08, 22.06it/s]global step 800000, trans_decision ep_re 210.75656252239713

{"global_step": 800000, "eval_re": [238.96815200879027, 264.7279115009777, 
370.0265279238537, 141.1795981877538, 202.00025835766434, 113.7039002009927, 
134.1195205612882, 145.65695584072677, 361.1090079858578, 136.0737926560663], 
"eval_len": [45, 51, 72, 27, 38, 22, 26, 28, 67, 26]}

 81%|████████  | 809997/1000000 [13:51:50<2:24:14, 21.96it/s]global step 810000, trans_decision ep_re 170.64202989563117

{"global_step": 810000, "eval_re": [145.56132849801335, 362.98104135113283, 
130.43325610506608, 178.44761445626173, 178.75074174971334, 201.21438101830444, 
108.24892160501376, 146.2619460467432, 140.14128562521995, 114.37978250084305], 
"eval_len": [28, 69, 25, 34, 34, 39, 21, 28, 27, 22]}

 82%|████████▏ | 819997/1000000 [14:02:10<2:17:22, 21.84it/s]global step 820000, trans_decision ep_re 214.16247857408547

{"global_step": 820000, "eval_re": [109.65013446254503, 279.2622952265058, 
130.33564157327785, 124.99766645639905, 685.8856537835072, 168.06961541107302, 
200.24243944934258, 183.3077365514171, 130.31288141885193, 129.56072140793526], 
"eval_len": [21, 52, 25, 24, 146, 33, 38, 35, 25, 25]}

 83%|████████▎ | 829997/1000000 [14:12:30<2:09:49, 21.82it/s]global step 830000, trans_decision ep_re 229.11972857197642

{"global_step": 830000, "eval_re": [490.2138402642409, 141.21558554535534, 
249.00748379487607, 183.5374542832105, 168.9425026639767, 168.6604385073583, 
114.10192955840519, 168.9854601504118, 438.3418508056857, 168.1907401462436], 
"eval_len": [91, 27, 47, 35, 32, 32, 22, 32, 82, 32]}

 84%|████████▍ | 839999/1000000 [14:22:50<2:01:34, 21.94it/s]global step 840000, trans_decision ep_re 261.35848764972707

{"global_step": 840000, "eval_re": [619.5778411866171, 125.27198449407283, 
135.71787528707108, 130.10977575024535, 177.80127355592452, 119.37188247480121, 
151.23688445256073, 135.8112141722616, 486.1629983866491, 532.523146737067], 
"eval_len": [118, 24, 26, 25, 34, 23, 29, 26, 90, 96]}

 85%|████████▍ | 849999/1000000 [14:33:10<1:56:22, 21.48it/s]global step 850000, trans_decision ep_re 170.49991559251112

{"global_step": 850000, "eval_re": [124.53065671206133, 125.54519109134954, 
149.3905609007916, 207.89106011411818, 316.53981809257675, 215.14649253387125, 
145.00534080334728, 139.83041763450638, 161.65063392874524, 119.46898411374384],
"eval_len": [24, 24, 29, 40, 59, 41, 28, 27, 31, 23]}

 86%|████████▌ | 859999/1000000 [14:43:31<1:47:01, 21.80it/s]global step 860000, trans_decision ep_re 195.5506982649203

{"global_step": 860000, "eval_re": [152.15236348304745, 212.6618913472535, 
145.9076074057974, 154.3482907520955, 124.82097977918654, 124.93827631713947, 
483.4335599293178, 281.37477455069404, 109.10246606923828, 166.76677301543302], 
"eval_len": [29, 41, 28, 30, 24, 24, 92, 53, 21, 32]}

 87%|████████▋ | 869999/1000000 [14:53:51<1:39:48, 21.71it/s]global step 870000, trans_decision ep_re 134.22426149405737

{"global_step": 870000, "eval_re": [178.4651377716326, 123.87003416135086, 
108.78900375826917, 124.73800730985698, 124.99194484220394, 114.98265309197798, 
156.65925858039543, 108.21667016603317, 188.04257827648502, 113.48732698236827],
"eval_len": [34, 24, 21, 24, 24, 22, 30, 21, 36, 22]}

 88%|████████▊ | 879999/1000000 [15:04:11<1:30:53, 22.00it/s]global step 880000, trans_decision ep_re 243.46605161444282

{"global_step": 880000, "eval_re": [120.3417971187609, 187.67125677736004, 
362.54777438076695, 135.73004943047854, 114.24351806064003, 614.4939938451861, 
447.6084796361076, 125.23429026286362, 124.47215363855734, 202.31720299370696], 
"eval_len": [23, 36, 67, 26, 22, 116, 79, 24, 24, 38]}

 89%|████████▉ | 889999/1000000 [15:14:31<1:24:35, 21.67it/s]global step 890000, trans_decision ep_re 208.39889436819098

{"global_step": 890000, "eval_re": [558.9518240192979, 103.23692390751368, 
135.96923321330286, 156.14339365422333, 189.29293499825977, 130.70854372030905, 
431.23640060096983, 146.46346030556418, 118.13005597410377, 113.85617328836557],
"eval_len": [102, 20, 26, 30, 36, 25, 89, 28, 23, 22]}

 90%|████████▉ | 899998/1000000 [15:25:01<1:15:53, 21.96it/s]global step 900000, trans_decision ep_re 168.53196508582738

{"global_step": 900000, "eval_re": [114.36651578494862, 373.97959898186645, 
108.7766609063076, 176.48381775643887, 182.71332248208603, 196.81702078750297, 
130.67200038605708, 152.1322642782371, 146.29598291944617, 103.08246657538301], 
"eval_len": [22, 71, 21, 34, 35, 37, 25, 29, 28, 20]}

 91%|█████████ | 909999/1000000 [15:35:21<1:08:30, 21.90it/s]global step 910000, trans_decision ep_re 201.86601775934213

{"global_step": 910000, "eval_re": [211.69022684083848, 141.12943938518438, 
135.1225612381158, 114.74397595525848, 139.25112626901776, 157.75474530841032, 
313.48055122419305, 124.65730422357105, 551.5811355050234, 129.24911164380833], 
"eval_len": [40, 27, 26, 22, 27, 30, 61, 24, 97, 25]}

 92%|█████████▏| 919998/1000000 [15:45:51<1:00:36, 22.00it/s]global step 920000, trans_decision ep_re 242.54617807949347

{"global_step": 920000, "eval_re": [166.17772773257573, 453.76302563114245, 
135.86069566625153, 513.1831633885022, 114.52287823177315, 167.64175226551515, 
278.7121842537356, 168.0673798037168, 296.99497676015017, 130.53799706157216], 
"eval_len": [32, 97, 26, 94, 22, 32, 54, 32, 57, 25]}

 93%|█████████▎| 929999/1000000 [15:56:11<53:59, 21.61it/s]global step 930000, trans_decision ep_re 206.77778687823843

{"global_step": 930000, "eval_re": [315.7457475550641, 151.38550319450601, 
170.09284621174297, 472.1356954936546, 177.37048140451907, 212.8843804246266, 
114.89110892938922, 118.98468463527331, 130.86211191557527, 203.42530901803306],
"eval_len": [61, 29, 33, 82, 34, 41, 22, 23, 25, 38]}

 94%|█████████▍| 939999/1000000 [16:06:21<45:52, 21.80it/s]global step 940000, trans_decision ep_re 144.9341752002029

{"global_step": 940000, "eval_re": [103.37441624170332, 254.49567544392102, 
140.3448789398516, 125.91454414948167, 120.26366815494607, 163.92369117064527, 
129.663520124761, 108.8669904573187, 162.603480708776, 139.89088661062436], 
"eval_len": [20, 49, 27, 24, 23, 31, 25, 21, 31, 27]}

 95%|█████████▍| 949998/1000000 [16:17:01<38:31, 21.63it/s]global step 950000, trans_decision ep_re 213.44804715617798

{"global_step": 950000, "eval_re": [139.40164532937206, 248.37301077533118, 
120.48256848206205, 115.00636589769375, 142.06300333971558, 369.6106960699549, 
173.57716455750344, 514.0375524984602, 149.93958707662037, 161.98887753506608], 
"eval_len": [27, 47, 23, 22, 27, 69, 33, 96, 29, 31]}

 96%|█████████▌| 959999/1000000 [16:27:21<30:52, 21.60it/s]global step 960000, trans_decision ep_re 150.09691302869496

{"global_step": 960000, "eval_re": [165.1192252525106, 120.00780797635676, 
125.41233282845684, 120.67619028348985, 107.72823284827963, 177.71876989219726, 
114.42129334262378, 240.1425281477676, 103.60427309140724, 226.13847662385987], 
"eval_len": [32, 23, 24, 23, 21, 34, 22, 45, 20, 43]}

 97%|█████████▋| 969998/1000000 [16:37:51<22:41, 22.04it/s]global step 970000, trans_decision ep_re 206.91458894057664

{"global_step": 970000, "eval_re": [172.7963110009779, 125.27234427242742, 
151.56413169246198, 146.2955678182211, 400.1844084886132, 210.14170282620967, 
367.67128327435034, 145.11484539361996, 141.9306474503107, 208.1746471885742], 
"eval_len": [33, 24, 29, 28, 71, 40, 69, 28, 27, 40]}

 98%|█████████▊| 979999/1000000 [16:48:11<15:19, 21.74it/s]global step 980000, trans_decision ep_re 152.56871802976917

{"global_step": 980000, "eval_re": [167.6730488472196, 108.29683704132108, 
206.97304155042013, 120.1788693847603, 108.62322330592951, 225.91260140670983, 
151.3855987925368, 113.09965015005304, 209.67284747887058, 113.87146233987067], 
"eval_len": [32, 21, 39, 23, 21, 43, 29, 22, 40, 22]}

 99%|█████████▉| 989999/1000000 [16:58:21<07:40, 21.74it/s]global step 990000, trans_decision ep_re 236.93007107213376

{"global_step": 990000, "eval_re": [153.47777034739516, 486.9638330451221, 
307.5237860312701, 125.30884115388892, 155.1423493495694, 541.9000658877363, 
155.43639670447746, 124.47381047183703, 210.35837756825364, 108.7154801617878], 
"eval_len": [30, 93, 58, 24, 30, 102, 30, 24, 40, 21]}

100%|█████████▉| 999997/1000000 [17:09:01<00:00, 21.67it/s]global step 1000000, trans_decision ep_re 229.75800996323397

{"global_step": 1000000, "eval_re": [156.35335145901135, 140.82920545426066, 
125.67780907763736, 394.15670066029855, 310.09886592580733, 543.4890027148947, 
268.2362392628928, 109.25941891918103, 114.52940760251303, 134.95009855584289], 
"eval_len": [30, 27, 24, 73, 60, 94, 52, 21, 22, 26]}

100%|██████████| 1000000/1000000 [17:09:04<00:00, 16.20it/s]
