
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.25
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9998/1000000 [03:53<9:29:13, 28.99it/s]global step 10000, trans_decision ep_re 305.6985301770287

{"global_step": 10000, "eval_re": [479.79178218033553, 373.5563217624746, 
112.30654934724478, 238.43928212642248, 591.4124969262807, 162.71163255159593, 
140.15506119628805, 280.00914675759367, 363.645527532678, 314.957501389373], 
"eval_len": [89, 70, 22, 45, 116, 31, 27, 54, 68, 62]}

  2%|▏         | 19997/1000000 [11:50<9:30:59, 28.61it/s]global step 20000, trans_decision ep_re 126.59094275037691

{"global_step": 20000, "eval_re": [215.61762157194195, 95.50777337662997, 
90.02981984357368, 157.25489062414428, 105.85856278298311, 83.90897932474886, 
153.23189256695125, 134.68664065673573, 107.35512112397252, 122.45812563208781],
"eval_len": [44, 19, 18, 31, 21, 17, 30, 26, 21, 24]}

  3%|▎         | 29997/1000000 [19:21<9:24:54, 28.62it/s]global step 30000, trans_decision ep_re 211.24268232837758

{"global_step": 30000, "eval_re": [189.0915406907552, 139.4553264517372, 
90.26962292505176, 346.6965948375992, 90.22445718770898, 124.27400351825547, 
480.9628519828613, 83.93000663526176, 367.3828269724526, 200.1395920820921], 
"eval_len": [36, 27, 18, 62, 18, 24, 91, 17, 70, 42]}

  4%|▍         | 39998/1000000 [27:05<9:15:07, 28.82it/s]global step 40000, trans_decision ep_re 221.6320469420319

{"global_step": 40000, "eval_re": [424.35156707511817, 89.1337076135044, 
268.89039600850487, 141.45972020798598, 119.13492691654001, 151.2631244846336, 
89.68537178156896, 421.3818786978083, 112.45466336495613, 398.5651132696986], 
"eval_len": [81, 18, 56, 27, 24, 29, 18, 83, 22, 73]}

  5%|▍         | 49999/1000000 [35:00<9:13:26, 28.61it/s]global step 50000, trans_decision ep_re 209.15783728682453

{"global_step": 50000, "eval_re": [108.13659954680514, 125.1652577446874, 
344.8382949180429, 544.3524418514118, 113.3746282630157, 456.5083532009656, 
101.22967717564114, 88.95335812910007, 119.29658035376211, 89.72318168481365], 
"eval_len": [21, 24, 62, 99, 22, 83, 20, 18, 23, 18]}

  6%|▌         | 59998/1000000 [42:31<9:04:03, 28.80it/s]global step 60000, trans_decision ep_re 141.71531359596239

{"global_step": 60000, "eval_re": [292.0616947811174, 112.6204378442618, 
79.06006798566979, 117.50302460757443, 248.8342892113918, 132.49198548037847, 
130.61265003943532, 106.66786316490034, 84.16947517612283, 113.13164766877166], 
"eval_len": [54, 22, 16, 23, 46, 26, 25, 21, 17, 22]}

  7%|▋         | 69997/1000000 [50:21<9:14:09, 27.97it/s]global step 70000, trans_decision ep_re 246.86209895107964

{"global_step": 70000, "eval_re": [89.89809086430667, 144.56349525080196, 
371.8555350719741, 160.6001621570596, 89.02411400094755, 102.71116704768744, 
551.7135479136363, 480.7548678502657, 336.70837798431654, 140.79163136980074], 
"eval_len": [18, 28, 69, 31, 18, 20, 104, 87, 63, 27]}

  8%|▊         | 79999/1000000 [58:11<9:08:02, 27.98it/s]global step 80000, trans_decision ep_re 182.62510215644016

{"global_step": 80000, "eval_re": [119.46581652114206, 123.15621332926594, 
160.39715268529463, 301.25519352306816, 103.26174191952809, 150.72079172134673, 
83.95689901146017, 355.3595383891231, 106.95695081890666, 321.7207236452658], 
"eval_len": [23, 24, 31, 55, 20, 29, 17, 66, 21, 59]}

  9%|▉         | 89997/1000000 [1:06:10<9:03:56, 27.88it/s]global step 90000, trans_decision ep_re 200.83739644472993

{"global_step": 90000, "eval_re": [289.7053042799275, 464.52784068591393, 
124.19898048310549, 83.76444252952072, 95.95238236138286, 107.9551802953533, 
143.49550349826956, 111.18337994643393, 122.9623075093674, 464.6286428580248], 
"eval_len": [53, 89, 24, 17, 19, 21, 28, 22, 24, 90]}

 10%|▉         | 99999/1000000 [1:14:00<8:50:08, 28.29it/s]global step 100000, trans_decision ep_re 209.9001492159397

{"global_step": 100000, "eval_re": [268.25395224762036, 102.33887128435903, 
474.14009220731754, 170.50464489436118, 89.41063384129606, 284.4850778589318, 
277.6620826905607, 124.13528212806365, 102.54988479403845, 205.52097021284806], 
"eval_len": [54, 20, 91, 33, 18, 55, 51, 24, 20, 39]}

 11%|█         | 109998/1000000 [1:21:50<8:37:22, 28.67it/s]global step 110000, trans_decision ep_re 233.29394830227733

{"global_step": 110000, "eval_re": [83.9119658393724, 310.001439857216, 
386.5849026503716, 456.96664184281246, 89.65681174381454, 264.21773166944837, 
255.90078909141388, 289.1039830730872, 83.90515200233672, 112.69006525290037], 
"eval_len": [17, 68, 71, 86, 18, 50, 48, 52, 17, 22]}

 12%|█▏        | 119999/1000000 [1:29:25<8:41:03, 28.15it/s]global step 120000, trans_decision ep_re 256.4753187137427

{"global_step": 120000, "eval_re": [417.3555682152066, 89.98976136878831, 
83.92923393461676, 119.64147772738949, 343.42532577514714, 141.6060226646852, 
374.6758193538673, 311.53090745602367, 357.5489006657817, 325.0501699759205], 
"eval_len": [94, 18, 17, 24, 63, 28, 76, 60, 68, 63]}

 13%|█▎        | 129999/1000000 [1:37:13<8:32:10, 28.31it/s]global step 130000, trans_decision ep_re 212.86978759867816

{"global_step": 130000, "eval_re": [89.58255163044504, 118.32896734668289, 
320.87975800365365, 396.5842513151065, 90.41526312787944, 304.91560768727805, 
278.3770610221444, 319.4386282971291, 97.18683569984245, 112.98895185661968], 
"eval_len": [18, 23, 64, 73, 18, 56, 50, 59, 19, 22]}

 14%|█▍        | 139999/1000000 [1:45:01<8:27:05, 28.27it/s]global step 140000, trans_decision ep_re 157.2143740923653

{"global_step": 140000, "eval_re": [83.90009850085852, 101.86817752805969, 
129.45510711611354, 294.40634061623433, 277.9527977031666, 99.81956556115556, 
95.0710812410655, 88.78313507473717, 118.4409342959643, 282.4465032862978], 
"eval_len": [17, 20, 25, 55, 54, 20, 19, 18, 23, 52]}

 15%|█▍        | 149998/1000000 [1:53:00<8:14:44, 28.63it/s]global step 150000, trans_decision ep_re 225.4265542904543

{"global_step": 150000, "eval_re": [618.9850026308198, 141.82157604540208, 
96.80199770169163, 159.32911044543752, 129.3158145431647, 101.85275262711404, 
90.46517946361303, 100.19736681583086, 376.1397699217994, 439.3569727096699], 
"eval_len": [130, 27, 19, 31, 25, 20, 18, 20, 69, 81]}

 16%|█▌        | 159999/1000000 [2:00:36<8:14:09, 28.33it/s]global step 160000, trans_decision ep_re 210.8814114969288

{"global_step": 160000, "eval_re": [181.80124224147949, 96.07118583506686, 
89.57110098866119, 96.4677337955037, 317.595147234244, 89.2217188054413, 
304.9384188417974, 100.87695716695892, 445.37746756848594, 386.8931424916492], 
"eval_len": [35, 19, 18, 19, 59, 18, 56, 20, 82, 72]}

 17%|█▋        | 169998/1000000 [2:08:24<8:03:59, 28.58it/s]global step 170000, trans_decision ep_re 257.85751045872655

{"global_step": 170000, "eval_re": [322.1615135983533, 259.3984760272602, 
430.94395090674885, 316.4886324146634, 624.2251607647008, 125.59753078543285, 
138.616312956223, 163.24598622488892, 96.39275860523581, 101.50478230375842], 
"eval_len": [67, 47, 78, 57, 118, 24, 28, 31, 19, 20]}

 18%|█▊        | 179998/1000000 [2:16:13<7:57:05, 28.65it/s]global step 180000, trans_decision ep_re 162.83147141897908

{"global_step": 180000, "eval_re": [84.03635514633449, 84.01887223041766, 
242.67284058509972, 105.64539794043486, 89.29826585973271, 89.00739946736846, 
280.1825375858526, 112.97219031331971, 88.82793138952114, 451.6529236717096], 
"eval_len": [17, 17, 46, 21, 18, 18, 52, 22, 18, 83]}

 19%|█▉        | 189999/1000000 [2:24:00<7:57:57, 28.24it/s]global step 190000, trans_decision ep_re 213.9643616075062

{"global_step": 190000, "eval_re": [101.53135448115755, 113.62856825291719, 
100.57898978421905, 95.6819904026604, 83.85272422642242, 96.17974674579588, 
409.15415647147205, 96.37391296810289, 870.3365803710565, 172.325592371258], 
"eval_len": [20, 22, 20, 19, 17, 19, 74, 19, 170, 34]}

 20%|█▉        | 199999/1000000 [2:31:46<7:54:54, 28.08it/s]global step 200000, trans_decision ep_re 140.48182221190677

{"global_step": 200000, "eval_re": [96.00287222497545, 89.31464489334843, 
118.8008837660471, 128.6300111585281, 118.85135008612194, 163.95792760325483, 
392.35728871711945, 124.17748577621013, 83.864531345953, 88.8612265475092], 
"eval_len": [19, 18, 23, 25, 23, 31, 71, 24, 17, 18]}

 21%|██        | 209998/1000000 [2:39:34<7:44:29, 28.35it/s]global step 210000, trans_decision ep_re 229.85752002583018

{"global_step": 210000, "eval_re": [101.78467828904256, 102.39815261548843, 
100.63708419129223, 364.5289067665879, 89.843562321847, 423.5840010508102, 
342.23356269429047, 297.20392927169195, 315.44778732069403, 160.91353573655732],
"eval_len": [20, 20, 20, 67, 18, 82, 65, 57, 61, 32]}

 22%|██▏       | 219999/1000000 [2:47:30<7:40:39, 28.22it/s]global step 220000, trans_decision ep_re 166.4175337465686

{"global_step": 220000, "eval_re": [95.56969708837183, 166.24722323011468, 
111.98404203700476, 97.06463851594349, 271.2137537175778, 89.5789363338747, 
102.21291026432614, 108.19049657656872, 364.39423159006077, 257.7194081118433], 
"eval_len": [19, 33, 22, 19, 54, 18, 20, 21, 70, 46]}

 23%|██▎       | 229999/1000000 [2:55:03<7:32:41, 28.35it/s]global step 230000, trans_decision ep_re 292.05437115633015

{"global_step": 230000, "eval_re": [302.78975866014315, 84.34198245840106, 
364.0261632482109, 107.47103951213734, 124.09797517653585, 304.71543666089724, 
273.03115917609233, 250.37049771804266, 685.8231560350719, 423.87654291776914], 
"eval_len": [54, 17, 65, 21, 24, 57, 51, 50, 128, 78]}

 24%|██▍       | 239997/1000000 [3:02:47<7:45:39, 27.20it/s]global step 240000, trans_decision ep_re 204.46690559043776

{"global_step": 240000, "eval_re": [347.561602852943, 198.46397060393016, 
397.8530309939764, 188.39453945877725, 101.75348996326558, 397.86434680425106, 
89.4027581093366, 89.07418806110834, 83.917797569813, 150.38333148697606], 
"eval_len": [64, 37, 74, 37, 20, 72, 18, 18, 17, 29]}

 25%|██▍       | 249998/1000000 [3:10:50<7:27:00, 27.96it/s]global step 250000, trans_decision ep_re 211.76226017565892

{"global_step": 250000, "eval_re": [314.9174194256233, 388.51203932557667, 
119.00222902255285, 151.00829717560998, 167.43376887640977, 133.6357825482024, 
263.099523339463, 369.2782329024086, 102.52095573982122, 108.21435340092127], 
"eval_len": [59, 70, 23, 29, 32, 26, 54, 68, 20, 21]}

 26%|██▌       | 259999/1000000 [3:18:32<7:28:49, 27.48it/s]global step 260000, trans_decision ep_re 230.7149123787482

{"global_step": 260000, "eval_re": [89.04414741044151, 136.13416497933562, 
143.93247288058873, 140.1786061233138, 257.03507769602675, 291.8509696610303, 
393.3170379617017, 200.24726278513097, 300.90317443346845, 354.5062098564441], 
"eval_len": [18, 26, 28, 27, 47, 54, 73, 37, 57, 66]}

 27%|██▋       | 269999/1000000 [3:26:26<7:14:10, 28.02it/s]global step 270000, trans_decision ep_re 173.53714143064127

{"global_step": 270000, "eval_re": [309.31237964133055, 112.77280107586516, 
139.9875537055635, 95.59094880325569, 89.18777084994858, 102.23985336489983, 
127.40809400107133, 331.5331401251149, 106.57543175583466, 320.7634409835284], 
"eval_len": [69, 22, 27, 19, 18, 20, 25, 70, 21, 59]}

 28%|██▊       | 279998/1000000 [3:34:30<7:02:21, 28.41it/s]global step 280000, trans_decision ep_re 258.6607088470022

{"global_step": 280000, "eval_re": [332.36012348285794, 95.34587031223872, 
103.27678755329347, 266.6679844888705, 418.0500026200911, 132.52631895314573, 
365.86575328368895, 417.54200912648355, 107.83820866742272, 347.1340299819291], 
"eval_len": [61, 19, 20, 53, 90, 26, 67, 80, 21, 63]}

 29%|██▉       | 289998/1000000 [3:42:10<7:01:59, 28.04it/s]global step 290000, trans_decision ep_re 342.1921248890997

{"global_step": 290000, "eval_re": [449.0756939061466, 295.8960634115279, 
338.0888273934147, 333.655217955954, 255.3613582121238, 447.45203828198146, 
217.96426159052527, 746.0671874717065, 249.09381142792853, 89.26678923968862], 
"eval_len": [81, 56, 62, 73, 54, 87, 44, 135, 47, 18]}

 30%|██▉       | 299997/1000000 [3:50:02<6:56:30, 28.01it/s]global step 300000, trans_decision ep_re 225.72376414197282

{"global_step": 300000, "eval_re": [89.0363179105408, 351.1318415187612, 
175.9365700914099, 162.13195792789722, 133.28201944067695, 89.85214104476285, 
101.55458397247247, 626.0752925950311, 444.24927397005604, 83.9876429481203], 
"eval_len": [18, 70, 34, 31, 27, 18, 20, 118, 81, 17]}

 31%|███       | 309997/1000000 [3:57:54<6:49:35, 28.08it/s]global step 310000, trans_decision ep_re 213.35702109314624

{"global_step": 310000, "eval_re": [118.5851019048036, 317.9900176583565, 
100.07715079768275, 370.4676673006415, 108.13305716467676, 292.3230957855137, 
102.51675210148389, 324.6072534352915, 107.22968528488751, 291.640429498125], 
"eval_len": [23, 62, 20, 68, 21, 56, 20, 59, 21, 54]}

 32%|███▏      | 319997/1000000 [4:06:00<6:47:27, 27.81it/s]global step 320000, trans_decision ep_re 161.64512653761437

{"global_step": 320000, "eval_re": [129.28103106357844, 166.15258098235975, 
338.3461038053557, 84.55801318037048, 112.156623808889, 155.6430109111414, 
279.5527105376801, 89.78883456160897, 143.21556629522627, 117.75679022993356], 
"eval_len": [25, 32, 66, 17, 22, 30, 52, 18, 28, 23]}

 33%|███▎      | 329997/1000000 [4:13:42<6:39:33, 27.95it/s]global step 330000, trans_decision ep_re 263.503563982815

{"global_step": 330000, "eval_re": [220.44434395612708, 496.71905148828483, 
327.3072795020934, 79.0350231974675, 154.48364924527047, 143.01386852591884, 
133.06720918156518, 394.22949154679975, 361.1986620217291, 325.5370611628938], 
"eval_len": [41, 93, 63, 16, 29, 28, 26, 74, 65, 63]}

 34%|███▍      | 339999/1000000 [4:21:34<6:28:54, 28.28it/s]global step 340000, trans_decision ep_re 230.35276180238483

{"global_step": 340000, "eval_re": [118.92749788449193, 119.10212994237152, 
256.3672629470805, 90.04071365798613, 90.08679014828861, 161.7581876115707, 
417.23257530447347, 95.3825532100815, 476.20122211286036, 478.42868520464333], 
"eval_len": [23, 23, 49, 18, 18, 31, 75, 19, 84, 88]}

 35%|███▍      | 349999/1000000 [4:29:40<6:26:01, 28.06it/s]global step 350000, trans_decision ep_re 277.1120548144561

{"global_step": 350000, "eval_re": [364.7113076410591, 103.90885449192405, 
625.105389076874, 106.10445381153687, 102.10695040321116, 302.4353113555794, 
348.30063270994464, 212.63561584878207, 101.591363766273, 504.22066903937696], 
"eval_len": [70, 21, 123, 21, 20, 57, 65, 42, 20, 99]}

 36%|███▌      | 359998/1000000 [4:37:20<6:21:02, 27.99it/s]global step 360000, trans_decision ep_re 215.50706791937372

{"global_step": 360000, "eval_re": [106.67339961262961, 335.3082606405163, 
83.83163895125236, 312.1420814595487, 113.27447763632757, 99.62965863241737, 
377.1714368469594, 329.0769660596895, 302.305314567305, 95.65744478709165], 
"eval_len": [21, 65, 17, 58, 22, 20, 69, 65, 59, 19]}

 37%|███▋      | 369997/1000000 [4:45:12<6:16:42, 27.87it/s]global step 370000, trans_decision ep_re 189.02861591824893

{"global_step": 370000, "eval_re": [119.41428749529362, 83.93746823166421, 
96.22913897145659, 354.24323892323537, 327.20411229378146, 235.79618467556563, 
363.6428174996594, 118.5143928635145, 95.94304671186437, 95.36147151645417], 
"eval_len": [23, 17, 19, 72, 62, 46, 71, 23, 19, 19]}

 38%|███▊      | 379998/1000000 [4:53:05<6:08:30, 28.04it/s]global step 380000, trans_decision ep_re 218.39523282378732

{"global_step": 380000, "eval_re": [123.62526998058544, 101.94882609177785, 
95.37359030571866, 95.99195670005753, 329.25604133730695, 280.3947607636405, 
380.22070144363346, 284.77347994978726, 348.6423341973147, 143.725367468051], 
"eval_len": [24, 20, 19, 19, 58, 50, 71, 55, 68, 28]}

 39%|███▉      | 389999/1000000 [5:01:00<6:02:55, 28.01it/s]global step 390000, trans_decision ep_re 218.56521615410708

{"global_step": 390000, "eval_re": [101.97361112593866, 254.1269378717264, 
547.8663126209557, 409.91189224927916, 100.75027569480511, 106.9447101874091, 
84.03194648700726, 101.55298198129698, 120.5395481774877, 357.95394514516477], 
"eval_len": [20, 48, 114, 75, 20, 21, 17, 20, 24, 67]}

 40%|███▉      | 399999/1000000 [5:08:54<6:00:50, 27.71it/s]global step 400000, trans_decision ep_re 259.2610839385744

{"global_step": 400000, "eval_re": [103.2466128562039, 107.77465946103999, 
96.0793444985829, 95.6676977220094, 306.4357324437541, 329.6588496719911, 
332.93125157240416, 380.1199513981368, 438.3282022627468, 402.3685374988743], 
"eval_len": [20, 21, 19, 19, 55, 70, 63, 68, 80, 75]}

 41%|████      | 409997/1000000 [5:17:00<5:53:47, 27.79it/s]global step 410000, trans_decision ep_re 246.19162639630167

{"global_step": 410000, "eval_re": [137.18563087948053, 485.0015749021428, 
96.82637067832951, 162.38037660629365, 119.84436554971404, 313.15632935625945, 
88.81686926712858, 350.595683861471, 90.71647470610445, 617.3925881560929], 
"eval_len": [27, 88, 19, 31, 23, 65, 18, 68, 18, 120]}

 42%|████▏     | 419999/1000000 [5:24:40<5:45:24, 27.99it/s]global step 420000, trans_decision ep_re 256.6091831172269

{"global_step": 420000, "eval_re": [117.64200032558381, 90.20742586233892, 
123.33990284321068, 84.04079124819934, 434.21138008828933, 337.62558938769666, 
165.5686412122525, 474.31270359883996, 331.4459890684975, 407.6974075373601], 
"eval_len": [23, 18, 24, 17, 79, 62, 32, 94, 60, 73]}

 43%|████▎     | 429997/1000000 [5:32:31<5:39:31, 27.98it/s]global step 430000, trans_decision ep_re 122.57480223314674

{"global_step": 430000, "eval_re": [155.8243714476068, 127.44148263390349, 
106.81422790569665, 89.8258016041518, 94.66222755991187, 105.53463291685668, 
157.96114625581822, 164.65995971990097, 102.42084054875328, 120.6033317388675], 
"eval_len": [30, 25, 21, 18, 19, 21, 31, 31, 20, 23]}

 44%|████▍     | 439999/1000000 [5:40:25<5:40:30, 27.41it/s]global step 440000, trans_decision ep_re 266.0204878113101

{"global_step": 440000, "eval_re": [97.68100062912998, 89.63572572703676, 
274.178887345015, 89.82964792200359, 159.9996054283339, 150.0340312703443, 
337.35766401311776, 476.09517470878853, 375.2660242873805, 610.1271167819505], 
"eval_len": [20, 18, 49, 18, 31, 29, 64, 99, 78, 122]}

 45%|████▍     | 449997/1000000 [5:48:20<5:35:59, 27.28it/s]global step 450000, trans_decision ep_re 212.32533478071088

{"global_step": 450000, "eval_re": [88.72419425585237, 166.834269377179, 
119.91410571522323, 107.82138535598895, 540.332050560956, 89.10574036305697, 
274.75319662378064, 537.5101941764091, 102.23628137220398, 96.0219300064586], 
"eval_len": [18, 33, 23, 21, 105, 18, 54, 91, 20, 19]}

 46%|████▌     | 459997/1000000 [5:56:30<5:30:11, 27.26it/s]global step 460000, trans_decision ep_re 172.06991010229575

{"global_step": 460000, "eval_re": [444.78080955848736, 175.3881260245438, 
95.22999917558714, 95.5543970269551, 83.82656329995453, 95.43231406552441, 
125.04478432777681, 403.10668941584146, 113.31169175758741, 89.0237263706994], 
"eval_len": [92, 33, 19, 19, 17, 19, 25, 75, 22, 18]}

 47%|████▋     | 469998/1000000 [6:04:20<5:10:50, 28.42it/s]global step 470000, trans_decision ep_re 277.294918707055

{"global_step": 470000, "eval_re": [549.6184393735684, 95.4025172456761, 
173.76695760082518, 420.5943783812751, 606.7361379938454, 154.62381549215527, 
88.74420053612066, 118.41049046071218, 95.77387889397689, 469.27837109239437], 
"eval_len": [113, 19, 33, 87, 114, 29, 18, 23, 19, 83]}

 48%|████▊     | 479999/1000000 [6:12:00<5:10:19, 27.93it/s]global step 480000, trans_decision ep_re 200.41571394729118

{"global_step": 480000, "eval_re": [148.28165504023136, 113.85226420340717, 
117.82882272000761, 106.48788288584647, 96.15324062810363, 419.75023344323984, 
105.7294234991128, 483.3177537052942, 328.8254051638377, 83.93045818383119], 
"eval_len": [28, 22, 23, 21, 19, 76, 21, 87, 63, 17]}

 49%|████▉     | 489998/1000000 [6:19:50<5:02:46, 28.07it/s]global step 490000, trans_decision ep_re 203.4202187641536

{"global_step": 490000, "eval_re": [101.53884431937368, 129.40785064636947, 
321.3502407688782, 324.677123861423, 282.4438585127687, 377.00980689388024, 
163.80480630294593, 94.94093049340489, 110.569033792842, 128.4596920496496], 
"eval_len": [20, 25, 62, 61, 55, 69, 31, 19, 22, 25]}

 50%|████▉     | 499997/1000000 [6:27:41<4:58:10, 27.95it/s]global step 500000, trans_decision ep_re 316.8978404407901

{"global_step": 500000, "eval_re": [333.509663993137, 370.7778666297665, 
84.80020640400541, 371.84707199332024, 102.70358476493378, 240.86674295505645, 
394.5515974746501, 484.91019620319474, 366.82615300964676, 418.18532098018994], 
"eval_len": [62, 68, 17, 68, 20, 48, 70, 89, 70, 70]}

 51%|█████     | 509999/1000000 [6:35:37<4:59:32, 27.26it/s]global step 510000, trans_decision ep_re 210.79375745380176

{"global_step": 510000, "eval_re": [575.3072405686407, 502.63759774394356, 
147.1543226354444, 300.88416718273555, 119.9271281946625, 90.5902575832037, 
102.43265667589662, 88.9937005675567, 95.45429820744917, 84.55620517848449], 
"eval_len": [106, 97, 29, 59, 23, 18, 20, 18, 19, 17]}

 52%|█████▏    | 519999/1000000 [6:43:31<4:49:32, 27.63it/s]global step 520000, trans_decision ep_re 285.0337580032221

{"global_step": 520000, "eval_re": [142.50061255017008, 132.66203590038424, 
107.25333238581548, 469.77120547354474, 341.0761721193359, 323.52817612808235, 
165.8603188617474, 247.91160340223757, 307.6026849799859, 612.1714382309174], 
"eval_len": [28, 26, 21, 81, 73, 71, 32, 47, 68, 109]}

 53%|█████▎    | 529997/1000000 [6:51:23<4:41:15, 27.85it/s]global step 530000, trans_decision ep_re 215.18037930952613

{"global_step": 530000, "eval_re": [254.79048350813775, 423.0962733237834, 
362.14031685043574, 94.78147238854697, 117.72265654559502, 100.58908201016888, 
416.96036224093206, 116.6516891597044, 101.71974239693229, 163.3517146710246], 
"eval_len": [51, 78, 65, 19, 23, 20, 79, 23, 20, 32]}

 54%|█████▍    | 539997/1000000 [6:59:14<4:35:57, 27.78it/s]global step 540000, trans_decision ep_re 242.12051405927383

{"global_step": 540000, "eval_re": [148.81280156586925, 177.5850654847217, 
131.3966229638873, 596.7272229502755, 89.44649077175319, 100.82638619521018, 
100.99380390288974, 490.40835529722125, 315.4188978623158, 269.58949359859395], 
"eval_len": [29, 33, 25, 109, 18, 20, 20, 85, 57, 50]}

 55%|█████▍    | 549998/1000000 [7:07:05<4:24:46, 28.33it/s]global step 550000, trans_decision ep_re 253.4615814204214

{"global_step": 550000, "eval_re": [101.83578960957404, 356.55816871946695, 
296.87341589508435, 417.89529511432636, 89.61444328434877, 103.02945026928204, 
95.49542098765983, 372.61575049172734, 156.01677138060757, 544.6813084521367], 
"eval_len": [20, 66, 54, 85, 18, 20, 19, 71, 31, 100]}

 56%|█████▌    | 559999/1000000 [7:14:56<4:23:01, 27.88it/s]global step 560000, trans_decision ep_re 215.3522296502771

{"global_step": 560000, "eval_re": [371.7503646070654, 90.2552468836201, 
223.506062193728, 88.89590057386133, 142.1279568676128, 356.2946698526212, 
103.40046083949352, 107.6755463276441, 544.6493001871521, 124.96678816997262], 
"eval_len": [66, 18, 46, 18, 28, 71, 20, 21, 101, 24]}

 57%|█████▋    | 569997/1000000 [7:22:45<4:16:32, 27.94it/s]global step 570000, trans_decision ep_re 229.90871968166363

{"global_step": 570000, "eval_re": [172.05420072537405, 107.72415512227158, 
112.17047067020418, 722.8793200935146, 88.88792822005033, 95.73880067037184, 
118.29298134559124, 89.26044952735168, 328.56577100967274, 463.51311943223374], 
"eval_len": [33, 21, 22, 140, 18, 19, 23, 18, 64, 84]}

 58%|█████▊    | 579999/1000000 [7:30:36<4:10:10, 27.98it/s]global step 580000, trans_decision ep_re 178.3471040956878

{"global_step": 580000, "eval_re": [113.31822001082443, 119.01051395165551, 
84.02464914150838, 348.67542276029354, 100.718689478731, 409.1717444615632, 
95.17264997074722, 292.4049132003043, 130.9730492740398, 90.00118870721045], 
"eval_len": [22, 23, 17, 68, 20, 74, 19, 60, 25, 18]}

 59%|█████▉    | 589999/1000000 [7:38:26<4:03:31, 28.06it/s]global step 590000, trans_decision ep_re 324.56289466166777

{"global_step": 590000, "eval_re": [117.3064566383969, 172.33368329040172, 
96.61001927990674, 608.9615572945642, 764.5287304746407, 100.87278280851906, 
435.4315587857582, 292.4596307098838, 514.1105010520691, 143.01402628253715], 
"eval_len": [23, 32, 19, 111, 149, 20, 79, 55, 98, 28]}

 60%|█████▉    | 599997/1000000 [7:46:17<3:58:06, 28.00it/s]global step 600000, trans_decision ep_re 203.6627676125527

{"global_step": 600000, "eval_re": [139.28891726117934, 140.7173115618638, 
127.18133495370188, 319.0059816652319, 291.6702942939972, 89.82415608458815, 
263.34627867368397, 261.1913835468464, 101.85508963222053, 302.5469284522137], 
"eval_len": [27, 27, 25, 59, 53, 18, 48, 48, 20, 62]}

 61%|██████    | 609999/1000000 [7:54:06<3:53:44, 27.81it/s]global step 610000, trans_decision ep_re 349.34818363885125

{"global_step": 610000, "eval_re": [861.3486473324531, 419.9641590039006, 
132.34955624037, 141.99056060430823, 669.6651628249373, 362.34807788539894, 
141.56354473290133, 359.12476123916434, 102.33027377087754, 302.7970927542008], 
"eval_len": [160, 79, 26, 27, 119, 77, 27, 73, 20, 54]}

 62%|██████▏   | 619997/1000000 [8:02:10<3:47:41, 27.82it/s]global step 620000, trans_decision ep_re 246.89640838500955

{"global_step": 620000, "eval_re": [141.909690104054, 269.9334186273148, 
390.76839504302745, 97.1396527471234, 475.85207643313544, 95.98905944332819, 
279.1376301694239, 83.9201490482062, 123.3696021522342, 510.9444100822477], 
"eval_len": [27, 49, 72, 19, 93, 19, 61, 17, 24, 90]}

 63%|██████▎   | 629998/1000000 [8:10:00<3:38:33, 28.21it/s]global step 630000, trans_decision ep_re 260.3761939821528

{"global_step": 630000, "eval_re": [90.0603338381363, 418.7357827744675, 
526.0415409224129, 100.80680215568337, 533.0650268426699, 367.68284904289897, 
125.2791637579986, 203.7335980399347, 136.6242850151201, 101.73255743220592], 
"eval_len": [18, 78, 100, 20, 103, 80, 24, 38, 26, 20]}

 64%|██████▍   | 639997/1000000 [8:17:50<3:34:12, 28.01it/s]global step 640000, trans_decision ep_re 245.5351454934793

{"global_step": 640000, "eval_re": [371.4076936494222, 208.31396012060213, 
433.8120803275584, 113.14899439791522, 394.2095702098523, 340.2593589447753, 
273.38680089264443, 106.34103250474894, 95.91749519311225, 118.55446869416156], 
"eval_len": [70, 42, 77, 22, 70, 64, 50, 21, 19, 23]}

 65%|██████▍   | 649998/1000000 [8:25:40<3:24:54, 28.47it/s]global step 650000, trans_decision ep_re 235.00813507571283

{"global_step": 650000, "eval_re": [356.1727226880726, 379.2411393067911, 
141.40743696903584, 193.17550851323386, 535.3402237745161, 305.30368115519707, 
128.12667367414244, 108.35033220134032, 113.58851239359896, 89.37512008119998], 
"eval_len": [67, 70, 27, 37, 99, 54, 25, 21, 22, 18]}

 66%|██████▌   | 659999/1000000 [8:33:30<3:21:58, 28.06it/s]global step 660000, trans_decision ep_re 158.24365820868644

{"global_step": 660000, "eval_re": [113.97569834122665, 151.3614592608165, 
140.8348020548731, 206.2989859443138, 129.9384677876413, 123.34046148741643, 
127.34177331916752, 348.61679020078736, 138.82745652480892, 101.90068716581271],
"eval_len": [23, 29, 27, 42, 25, 24, 25, 65, 27, 20]}

 67%|██████▋   | 669999/1000000 [8:41:20<3:17:41, 27.82it/s]global step 670000, trans_decision ep_re 279.3003150025958

{"global_step": 670000, "eval_re": [107.12311598405351, 111.99018400651894, 
439.4074357914622, 89.66213403101254, 106.54869176390838, 467.6955860052092, 
294.5247585881488, 524.7386675920877, 101.09609779662874, 550.2164784669286], 
"eval_len": [21, 22, 82, 18, 21, 90, 52, 91, 20, 104]}

 68%|██████▊   | 679999/1000000 [8:49:10<3:11:42, 27.82it/s]global step 680000, trans_decision ep_re 266.80310492744303

{"global_step": 680000, "eval_re": [96.34867818512582, 548.753765521974, 
95.9332833965527, 350.9125240629366, 489.79378272397037, 107.62415270598363, 
89.66012338085355, 121.86851249676309, 637.4674925974257, 129.6687342028447], 
"eval_len": [19, 100, 19, 69, 96, 21, 18, 24, 126, 25]}

 69%|██████▉   | 689997/1000000 [8:56:53<3:06:43, 27.67it/s]global step 690000, trans_decision ep_re 354.9941609687089

{"global_step": 690000, "eval_re": [618.9985985669695, 346.05058914610413, 
102.39124034451825, 218.20702032418578, 450.6315337624782, 347.8165510000327, 
339.9156704373616, 437.3331572296706, 347.82975457415193, 340.7674943016161], 
"eval_len": [115, 64, 20, 41, 86, 66, 65, 79, 65, 68]}

 70%|██████▉   | 699998/1000000 [9:04:44<2:56:31, 28.32it/s]global step 700000, trans_decision ep_re 184.94283678015265

{"global_step": 700000, "eval_re": [139.97804281575858, 96.4677355200574, 
89.19462866665937, 95.26124636159751, 349.07261297702803, 468.5218663571745, 
102.50771678410104, 254.15927443394247, 157.18269651455955, 97.08254737064787], 
"eval_len": [27, 19, 18, 19, 64, 88, 20, 50, 30, 19]}

 71%|███████   | 709999/1000000 [9:12:36<2:52:15, 28.06it/s]global step 710000, trans_decision ep_re 287.309075194483

{"global_step": 710000, "eval_re": [571.5489580131728, 160.31627561521253, 
118.78854689401625, 100.97125159196155, 129.1214415586534, 783.1888211333899, 
124.80314639639742, 317.8021182774363, 277.83328284192737, 288.71690962266206], 
"eval_len": [102, 31, 23, 20, 25, 159, 25, 59, 51, 60]}

 72%|███████▏  | 719998/1000000 [9:20:27<2:43:56, 28.46it/s]global step 720000, trans_decision ep_re 233.19246840985025

{"global_step": 720000, "eval_re": [502.7177703814774, 94.8799057960627, 
138.71301206201403, 102.22192225659559, 89.38003926935443, 529.4555621747612, 
245.32827229750248, 414.4952015486522, 101.70842776621733, 113.02457054586522], 
"eval_len": [98, 19, 27, 20, 18, 103, 48, 72, 20, 22]}

 73%|███████▎  | 729997/1000000 [9:28:30<2:39:02, 28.30it/s]global step 730000, trans_decision ep_re 275.91386872282226

{"global_step": 730000, "eval_re": [144.3083107483912, 83.91666163690407, 
542.2718957877441, 107.35804068013846, 130.91082303588027, 713.6149495972226, 
429.7785756214256, 123.3717099011783, 89.05407897824631, 394.55364124109144], 
"eval_len": [28, 17, 95, 21, 25, 127, 77, 24, 18, 77]}

 74%|███████▍  | 739998/1000000 [9:36:07<2:32:34, 28.40it/s]global step 740000, trans_decision ep_re 220.55212463460958

{"global_step": 740000, "eval_re": [345.84303689322263, 168.16139759409427, 
96.19397541501453, 105.80540814373096, 645.447511521694, 444.26532669179784, 
84.02458702129151, 113.74173101479846, 112.61782320298587, 89.42044884746576], 
"eval_len": [67, 32, 19, 21, 121, 81, 17, 22, 22, 18]}

 75%|███████▍  | 749998/1000000 [9:43:57<2:26:00, 28.54it/s]global step 750000, trans_decision ep_re 287.7617846758567

{"global_step": 750000, "eval_re": [547.1090115966462, 119.27183483349495, 
310.4613309523671, 133.769829308475, 526.5883395383946, 128.58717557510832, 
128.4165083096604, 380.2744686792483, 463.2545632956206, 139.88478466955166], 
"eval_len": [98, 23, 59, 26, 108, 25, 25, 68, 80, 27]}

 76%|███████▌  | 759998/1000000 [9:52:00<2:20:40, 28.43it/s]global step 760000, trans_decision ep_re 275.98820183418417

{"global_step": 760000, "eval_re": [111.98923511851564, 540.9843110255288, 
115.45422620576663, 100.46069298523038, 286.6423654778526, 511.5412271333877, 
357.15129112933545, 277.6592431414519, 311.6500711282865, 146.34935499648583], 
"eval_len": [22, 99, 23, 20, 52, 97, 66, 53, 57, 28]}

 77%|███████▋  | 769999/1000000 [9:59:50<2:18:09, 27.75it/s]global step 770000, trans_decision ep_re 217.2816526883396

{"global_step": 770000, "eval_re": [162.52439130030675, 106.54302607438721, 
258.41840675407906, 282.8835469278312, 122.63681636524306, 83.88416147002258, 
603.3573204499467, 316.2031188183664, 133.53076830974763, 102.83497041346533], 
"eval_len": [32, 21, 54, 58, 24, 17, 116, 63, 26, 20]}

 78%|███████▊  | 779998/1000000 [10:07:40<2:09:09, 28.39it/s]global step 780000, trans_decision ep_re 271.6132273873063

{"global_step": 780000, "eval_re": [89.50049940854811, 474.05261923694934, 
198.74493481757162, 146.70908914253982, 400.8245197527082, 118.7995068804744, 
83.74478765613654, 310.146960115092, 278.92835064686926, 614.6810062161741], 
"eval_len": [18, 88, 38, 28, 75, 23, 17, 59, 53, 105]}

 79%|███████▉  | 789997/1000000 [10:15:21<2:04:12, 28.18it/s]global step 790000, trans_decision ep_re 227.36696604978042

{"global_step": 790000, "eval_re": [399.85924316291715, 414.2214425941923, 
83.90602046344851, 117.48229619352928, 143.66608752838266, 288.3848560617571, 
202.6086297222634, 137.52420636894712, 330.34281418038785, 155.6740642219788], 
"eval_len": [81, 75, 17, 23, 28, 56, 40, 28, 59, 29]}

 80%|███████▉  | 799997/1000000 [10:23:11<1:59:22, 27.92it/s]global step 800000, trans_decision ep_re 288.63346684088344

{"global_step": 800000, "eval_re": [88.95499982124285, 89.7595513756917, 
93.80966183742417, 271.8065513332098, 747.4128715036808, 191.83614437607167, 
442.7442186642184, 102.18750809946033, 592.2801691223921, 265.5429922754425], 
"eval_len": [18, 18, 19, 51, 142, 36, 80, 20, 114, 49]}

 81%|████████  | 809997/1000000 [10:31:03<1:53:51, 27.81it/s]global step 810000, trans_decision ep_re 342.23338995666785

{"global_step": 810000, "eval_re": [700.6860839395121, 139.5873512013217, 
118.71196360501247, 756.0398787959813, 88.8990146587522, 95.87765715514382, 
391.29240226594027, 102.28401564049463, 448.8812639180022, 580.0742683865177], 
"eval_len": [141, 27, 23, 149, 18, 19, 72, 20, 95, 103]}

 82%|████████▏ | 819999/1000000 [10:38:55<1:47:39, 27.87it/s]global step 820000, trans_decision ep_re 245.1472114881361

{"global_step": 820000, "eval_re": [102.00386145190261, 172.98556482530813, 
195.96623437027586, 477.4593391295535, 584.2742913330255, 449.41994891282303, 
89.62155970281617, 160.09893734487613, 123.36777504040344, 96.2746027703767], 
"eval_len": [20, 33, 36, 87, 105, 81, 18, 32, 24, 19]}

 83%|████████▎ | 829999/1000000 [10:46:47<1:40:43, 28.13it/s]global step 830000, trans_decision ep_re 207.98944383407178

{"global_step": 830000, "eval_re": [108.45377819644635, 105.09264130505848, 
321.89880218419603, 174.15678407082527, 385.9953386143384, 107.55189886178042, 
88.80883352729998, 83.94620525361698, 88.84490983760423, 615.1452464895516], 
"eval_len": [21, 21, 58, 34, 83, 21, 18, 17, 18, 114]}

 84%|████████▍ | 839999/1000000 [10:54:50<1:35:11, 28.01it/s]global step 840000, trans_decision ep_re 230.45852356059714

{"global_step": 840000, "eval_re": [113.42190452623916, 134.67066661686115, 
530.648851159951, 89.32878548431887, 495.0353574173652, 102.24301594116596, 
410.7269848276095, 116.09992903846624, 228.4466924940474, 83.96304809994687], 
"eval_len": [22, 26, 95, 18, 86, 20, 81, 23, 43, 17]}

 85%|████████▍ | 849997/1000000 [11:02:30<1:29:35, 27.90it/s]global step 850000, trans_decision ep_re 201.92555530828778

{"global_step": 850000, "eval_re": [107.96757284586255, 127.20745779476464, 
125.91570613159831, 591.8818082413179, 159.46690436974382, 380.11057008755535, 
101.96122832268672, 110.49175496093673, 223.66701975777605, 90.58553057063605], 
"eval_len": [21, 25, 25, 112, 31, 77, 20, 22, 43, 18]}

 86%|████████▌ | 859998/1000000 [11:10:22<1:21:42, 28.56it/s]global step 860000, trans_decision ep_re 213.43467955403375

{"global_step": 860000, "eval_re": [106.18799975649206, 89.45845633181679, 
286.43896536795944, 114.27158429913743, 652.0326331599097, 83.91012271213638, 
138.765520796249, 108.45813246088699, 191.34894838699847, 363.4744322687514], 
"eval_len": [21, 18, 51, 22, 115, 17, 27, 21, 37, 67]}

 87%|████████▋ | 869998/1000000 [11:18:14<1:16:48, 28.21it/s]global step 870000, trans_decision ep_re 190.16382987331446

{"global_step": 870000, "eval_re": [690.4097726934223, 107.36857520379534, 
96.33760084360719, 88.6837008497694, 119.767558138689, 96.38527108713913, 
184.72591826512024, 281.116865327004, 94.47914069743078, 142.36389562716735], 
"eval_len": [120, 21, 19, 18, 24, 19, 36, 52, 19, 28]}

 88%|████████▊ | 879999/1000000 [11:26:06<1:12:21, 27.64it/s]global step 880000, trans_decision ep_re 201.65258305786264

{"global_step": 880000, "eval_re": [95.950461368795, 620.2791284880967, 
156.79416238089442, 427.95893801071486, 195.57561100138062, 89.2300044057839, 
95.71636491780421, 89.48897498672346, 156.77101641830885, 88.76116860012428], 
"eval_len": [19, 107, 30, 83, 39, 18, 19, 18, 30, 18]}

 89%|████████▉ | 889997/1000000 [11:33:56<1:04:45, 28.31it/s]global step 890000, trans_decision ep_re 233.0024104811833

{"global_step": 890000, "eval_re": [130.06937026803354, 116.92403643736942, 
916.6338387055944, 331.49450936551705, 105.56993492931326, 90.81334014538666, 
354.7741865070815, 95.39139584846582, 83.79327950070497, 104.56021310436626], 
"eval_len": [25, 23, 171, 61, 21, 18, 64, 19, 17, 21]}

 90%|████████▉ | 899999/1000000 [11:41:46<59:31, 28.00it/s]global step 900000, trans_decision ep_re 175.2020733401418

{"global_step": 900000, "eval_re": [101.94468224727945, 83.8743751083886, 
426.21686701225104, 89.02313950873443, 123.01825877095996, 108.37583525360596, 
130.22557195673176, 105.80432291589099, 414.7317483530787, 168.80593227449714], 
"eval_len": [20, 17, 75, 18, 24, 21, 25, 21, 81, 32]}

 91%|█████████ | 909998/1000000 [11:49:36<52:59, 28.31it/s]global step 910000, trans_decision ep_re 136.51737677730523

{"global_step": 910000, "eval_re": [89.59165824335051, 101.17291038892245, 
84.16611660082157, 459.6684384657824, 95.38187047136975, 110.82071106907838, 
96.25725679922803, 101.95386806401825, 113.02714555581275, 113.1337921146683], 
"eval_len": [18, 20, 17, 86, 19, 22, 19, 20, 22, 22]}

 92%|█████████▏| 919999/1000000 [11:57:26<47:40, 27.96it/s]global step 920000, trans_decision ep_re 243.88337840820986

{"global_step": 920000, "eval_re": [552.1014025259766, 105.26964518398852, 
96.31687665715454, 101.83357052324848, 441.34946122512207, 83.87122046012989, 
554.7344029560334, 83.87259352890652, 329.07944376939184, 90.40516725214678], 
"eval_len": [113, 21, 19, 20, 76, 17, 98, 17, 69, 18]}

 93%|█████████▎| 929999/1000000 [12:05:30<41:44, 27.95it/s]global step 930000, trans_decision ep_re 306.57629024738316

{"global_step": 930000, "eval_re": [330.10209292118896, 309.6246025636252, 
95.44529216955519, 413.3046960288706, 586.234187597041, 486.33398508287087, 
313.33664206248886, 255.02402421981427, 96.08807992671406, 180.26929990166286], 
"eval_len": [72, 61, 19, 77, 107, 91, 56, 48, 19, 34]}

 94%|█████████▍| 939999/1000000 [12:13:20<35:52, 27.88it/s]global step 940000, trans_decision ep_re 252.18174454590502

{"global_step": 940000, "eval_re": [101.13385588496219, 309.4843535653011, 
349.107436376828, 102.9270260846613, 418.0347220739371, 355.9442693354323, 
548.0270060571415, 95.0275981425538, 152.9250686295995, 89.20610930863332], 
"eval_len": [20, 55, 67, 20, 80, 64, 94, 19, 30, 18]}

 95%|█████████▍| 949999/1000000 [12:21:10<29:55, 27.85it/s]global step 950000, trans_decision ep_re 193.9787766359133

{"global_step": 950000, "eval_re": [94.75270482606035, 299.61052070932476, 
160.30394105419865, 138.60176136420148, 130.44940064076212, 242.95907769172305, 
524.1244421488838, 140.00110650199923, 113.18410291578641, 95.80070850619312], 
"eval_len": [19, 60, 31, 27, 25, 45, 98, 27, 22, 19]}

 96%|█████████▌| 959998/1000000 [12:29:00<23:09, 28.79it/s]global step 960000, trans_decision ep_re 248.78514098448318

{"global_step": 960000, "eval_re": [437.18870927439247, 196.6661764960733, 
96.04818506929992, 498.47721341911716, 123.55325802440726, 100.28293333965878, 
240.62735842729958, 591.460273234062, 102.2858606174705, 101.26144194305067], 
"eval_len": [85, 38, 19, 87, 24, 20, 46, 110, 20, 20]}

 97%|█████████▋| 969997/1000000 [12:36:50<17:52, 27.98it/s]global step 970000, trans_decision ep_re 236.42111380304374

{"global_step": 970000, "eval_re": [133.70797506376167, 83.78386966122302, 
340.49550328244004, 130.0571207115985, 229.1198052392544, 95.16210808719413, 
528.2374185207717, 283.54141649579947, 363.58443893712797, 176.52148203126643], 
"eval_len": [26, 17, 63, 25, 47, 19, 100, 55, 70, 34]}

 98%|█████████▊| 979997/1000000 [12:44:40<11:54, 27.99it/s]global step 980000, trans_decision ep_re 249.6628407105913

{"global_step": 980000, "eval_re": [385.43481822845655, 140.38769071622468, 
238.8339697565774, 102.19481743336809, 593.6957506001323, 89.91487247238176, 
96.44815532637129, 95.61338187762755, 298.5665607960162, 455.53838989875766], 
"eval_len": [75, 27, 49, 20, 112, 18, 19, 19, 57, 77]}

 99%|█████████▉| 989998/1000000 [12:52:21<05:52, 28.36it/s]global step 990000, trans_decision ep_re 252.30935386877422

{"global_step": 990000, "eval_re": [83.94193407588374, 411.7631646439208, 
395.4395745568564, 515.7575200873665, 107.40721789081084, 88.65503213476163, 
89.69046997564759, 295.34797795706316, 95.9415557952787, 439.14909157015296], 
"eval_len": [17, 83, 72, 104, 21, 18, 18, 56, 19, 81]}

100%|█████████▉| 999998/1000000 [13:00:13<00:00, 28.40it/s]global step 1000000, trans_decision ep_re 315.2370358267781

{"global_step": 1000000, "eval_re": [95.62922270065951, 407.78501183377114, 
574.1455862714934, 350.04386482140455, 575.9228811747589, 113.71597955520228, 
132.49797405063805, 371.86498078921437, 96.25442409934713, 434.5104329712915], 
"eval_len": [19, 74, 104, 68, 102, 22, 25, 70, 19, 94]}

100%|██████████| 1000000/1000000 [13:00:26<00:00, 21.36it/s]
