
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.15
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:55<9:40:44, 28.41it/s]global step 10000, trans_decision ep_re 198.2450782129752

{"global_step": 10000, "eval_re": [125.16286889436725, 152.16135671507422, 
246.83827524470382, 371.21204310719327, 256.5992617611295, 97.12061154574587, 
102.84688109894792, 148.47534457032108, 118.91497289839499, 363.1191662938738], 
"eval_len": [24, 29, 47, 67, 48, 19, 20, 28, 23, 67]}

  2%|▏         | 19999/1000000 [11:41<9:43:16, 28.00it/s]global step 20000, trans_decision ep_re 224.56278594900428

{"global_step": 20000, "eval_re": [351.2997016879008, 114.08798426231172, 
108.52128657312387, 125.0180592441297, 412.9241572342494, 96.26589661790739, 
294.6250477275115, 293.06210120861414, 140.48648854237715, 309.33713639191734], 
"eval_len": [64, 22, 21, 24, 78, 19, 54, 54, 27, 56]}

  3%|▎         | 29997/1000000 [19:40<9:37:38, 27.99it/s]global step 30000, trans_decision ep_re 246.84548920939352

{"global_step": 30000, "eval_re": [396.3561760453811, 117.74975988978667, 
123.31300842438583, 318.4854402597777, 283.2399003620051, 347.43234603269667, 
102.08372993028951, 293.85275954876244, 361.9699323871395, 123.97183921371071], 
"eval_len": [73, 23, 24, 58, 55, 67, 20, 56, 66, 24]}

  4%|▍         | 39998/1000000 [27:30<9:21:12, 28.51it/s]global step 40000, trans_decision ep_re 207.56062045764733

{"global_step": 40000, "eval_re": [125.20967522016075, 317.3660701479409, 
345.41734824961014, 106.79656398526825, 287.85894462218005, 305.1478138990821, 
112.48445309872346, 112.82303458586311, 233.05547857228922, 129.44682219535545],
"eval_len": [24, 59, 63, 21, 54, 57, 22, 22, 46, 25]}

  5%|▍         | 49999/1000000 [35:04<9:22:58, 28.12it/s]global step 50000, trans_decision ep_re 310.16462057209696

{"global_step": 50000, "eval_re": [124.52100910007985, 234.32576137823958, 
762.0287299502049, 303.52192212119206, 111.87448935278107, 420.3402601373099, 
354.1554987357966, 367.9295083316596, 326.0973106825224, 96.85171593118373], 
"eval_len": [24, 43, 150, 58, 22, 78, 65, 68, 60, 19]}

  6%|▌         | 59997/1000000 [42:50<9:17:17, 28.11it/s]global step 60000, trans_decision ep_re 263.6732345865916

{"global_step": 60000, "eval_re": [289.61740856630996, 348.91765472333657, 
108.5917584083302, 130.08976762687723, 329.2457242631083, 313.6075938814995, 
357.33711808910385, 342.2512416827924, 304.1127398487397, 112.9613387758188], 
"eval_len": [54, 65, 21, 25, 63, 58, 67, 63, 59, 22]}

  7%|▋         | 69999/1000000 [50:36<9:06:04, 28.38it/s]global step 70000, trans_decision ep_re 246.95562901468423

{"global_step": 70000, "eval_re": [236.44526856850322, 285.2586017421439, 
345.727040453857, 434.7814901173606, 320.0844626254756, 96.26511507122018, 
96.67674106767086, 359.8516383603874, 158.22523764806098, 136.24069449216267], 
"eval_len": [48, 54, 62, 81, 59, 19, 19, 65, 30, 26]}

  8%|▊         | 79999/1000000 [58:24<9:07:35, 28.00it/s]global step 80000, trans_decision ep_re 266.80993730760474

{"global_step": 80000, "eval_re": [491.0104255706559, 315.60254148367125, 
333.6976143151156, 118.66096164407992, 305.7339514786554, 129.94197478270246, 
89.51796691227503, 304.27599463672124, 112.4017699785817, 467.2561722735889], 
"eval_len": [89, 58, 61, 23, 56, 25, 18, 55, 22, 85]}

  9%|▉         | 89998/1000000 [1:06:14<8:56:39, 28.26it/s]global step 90000, trans_decision ep_re 269.4017802537114

{"global_step": 90000, "eval_re": [327.40363438317905, 312.9547079016698, 
292.8390641205792, 119.54365595297709, 400.6886408216081, 135.4083865351132, 
102.84597248416006, 337.93239635865973, 314.88440125281244, 349.51694272635547],
"eval_len": [61, 58, 55, 23, 74, 26, 20, 63, 58, 67]}

 10%|▉         | 99997/1000000 [1:14:02<8:50:13, 28.29it/s]global step 100000, trans_decision ep_re 245.14328945292914

{"global_step": 100000, "eval_re": [106.68488345604592, 349.4634403938993, 
343.9324899427467, 361.68659351042305, 307.44441238586114, 316.7340908695555, 
135.7245268211631, 119.6424336373584, 124.62295103983655, 285.49707247240156], 
"eval_len": [21, 63, 63, 67, 56, 58, 26, 23, 24, 53]}

 11%|█         | 109997/1000000 [1:22:00<8:50:23, 27.97it/s]global step 110000, trans_decision ep_re 206.04141295929907

{"global_step": 110000, "eval_re": [286.37422920249895, 108.01746137994313, 
222.97618010720902, 106.15241793191933, 316.4559999162948, 113.50418335466777, 
341.2296104083695, 97.02364337923505, 111.30263770038357, 357.37776621246996], 
"eval_len": [52, 21, 44, 21, 58, 22, 64, 19, 22, 66]}

 12%|█▏        | 119997/1000000 [1:29:36<8:40:13, 28.19it/s]global step 120000, trans_decision ep_re 339.50224036054743

{"global_step": 120000, "eval_re": [107.40496766372785, 527.3602520434473, 
392.87245499395146, 333.74708475424103, 291.3592982980651, 313.33717989751466, 
380.32586139982044, 331.27593501301715, 453.5866777550269, 263.75269178666235], 
"eval_len": [21, 99, 73, 61, 54, 62, 70, 61, 86, 48]}

 13%|█▎        | 129999/1000000 [1:37:23<8:29:45, 28.45it/s]global step 130000, trans_decision ep_re 248.82151485643072

{"global_step": 130000, "eval_re": [290.2634891299053, 95.69539259556907, 
162.18550884807505, 398.5202847900125, 137.51701912797324, 284.77973362774867, 
108.47804598601243, 384.9984519090948, 312.99841707816466, 312.77880547175164], 
"eval_len": [54, 19, 31, 73, 27, 53, 21, 71, 58, 58]}

 14%|█▍        | 139998/1000000 [1:45:07<8:14:53, 28.96it/s]global step 140000, trans_decision ep_re 224.13287321383368

{"global_step": 140000, "eval_re": [297.2906861647857, 96.02640588662436, 
128.81890063219265, 124.5941591004404, 293.6535567428705, 118.1357843067559, 
581.9182301212365, 102.78454866847329, 163.56800012814136, 334.5384603868159], 
"eval_len": [54, 19, 25, 24, 55, 23, 111, 20, 31, 65]}

 15%|█▍        | 149997/1000000 [1:53:00<8:10:50, 28.86it/s]global step 150000, trans_decision ep_re 236.0534744603524

{"global_step": 150000, "eval_re": [323.2400579492483, 102.38272753950118, 
277.11886308292543, 311.21110785648506, 428.3550096169034, 89.93628524486832, 
168.74251679425743, 263.1639378823879, 140.394820091492, 255.98941854545518], 
"eval_len": [60, 20, 52, 56, 87, 18, 32, 50, 27, 47]}

 16%|█▌        | 159999/1000000 [2:00:31<8:14:44, 28.30it/s]global step 160000, trans_decision ep_re 262.0308634259765

{"global_step": 160000, "eval_re": [415.1633616527793, 101.37533530358147, 
302.1571033248367, 341.49930060782566, 321.5874832541903, 273.87374568800794, 
357.66460507162907, 274.2587890271217, 125.38705992787297, 107.34185040191986], 
"eval_len": [77, 20, 56, 62, 59, 52, 65, 50, 24, 21]}

 17%|█▋        | 169998/1000000 [2:08:15<7:55:52, 29.07it/s]global step 170000, trans_decision ep_re 225.23793598866905

{"global_step": 170000, "eval_re": [145.64865766941935, 327.6187709351717, 
435.6513454349427, 107.97554202903451, 310.0266843504982, 146.96515280378765, 
143.20705205241805, 106.51813961690988, 173.24533425935232, 355.5226807351564], 
"eval_len": [28, 60, 79, 21, 58, 28, 27, 21, 36, 64]}

 18%|█▊        | 179999/1000000 [2:16:10<8:00:38, 28.43it/s]global step 180000, trans_decision ep_re 274.4041820492522

{"global_step": 180000, "eval_re": [111.32587092651944, 416.586053983523, 
101.67492048078046, 330.3042992782261, 413.61903545961854, 125.62666522860246, 
351.87268849690224, 328.90453425568415, 129.21397938773984, 434.9137729949256], 
"eval_len": [22, 76, 20, 62, 75, 24, 66, 62, 25, 85]}

 19%|█▉        | 189997/1000000 [2:23:42<7:55:34, 28.39it/s]global step 190000, trans_decision ep_re 225.34278470202685

{"global_step": 190000, "eval_re": [107.45103955094872, 291.40819253398587, 
343.38630997154746, 337.36665969276186, 279.2400453298317, 113.1956796736107, 
297.13659442050107, 279.3041175640241, 114.64634651226872, 90.29286177078792], 
"eval_len": [21, 54, 63, 64, 51, 22, 54, 52, 22, 18]}

 20%|█▉        | 199997/1000000 [2:31:40<7:50:55, 28.31it/s]global step 200000, trans_decision ep_re 318.39725942876703

{"global_step": 200000, "eval_re": [338.4226059589228, 350.86260033434417, 
306.9491474010836, 627.5126490674651, 294.3237580403354, 376.45824768956095, 
118.71357326933668, 315.542103573792, 168.85084785936522, 286.3370610934639], 
"eval_len": [62, 64, 58, 120, 53, 74, 23, 58, 32, 53]}

 21%|██        | 209997/1000000 [2:39:12<7:46:26, 28.23it/s]global step 210000, trans_decision ep_re 253.5286184462228

{"global_step": 210000, "eval_re": [166.16230937547996, 129.89500241543954, 
119.63963842767475, 355.0380143112337, 96.14298169099969, 450.02983314583736, 
400.7465452973196, 96.78837953670191, 385.48483158273893, 335.35864867880275], 
"eval_len": [32, 25, 23, 66, 19, 86, 73, 19, 70, 61]}

 22%|██▏       | 219998/1000000 [2:46:56<7:32:06, 28.75it/s]global step 220000, trans_decision ep_re 202.82144354584312

{"global_step": 220000, "eval_re": [285.94505072305464, 143.14298209169945, 
96.43922793171281, 118.92201959843258, 254.1214532084675, 231.0800248787288, 
324.45260869919855, 169.3123459774605, 290.84747573414995, 113.95124661552623], 
"eval_len": [52, 28, 19, 23, 48, 45, 59, 32, 55, 22]}

 23%|██▎       | 229998/1000000 [2:54:50<7:28:03, 28.64it/s]global step 230000, trans_decision ep_re 208.14697918112378

{"global_step": 230000, "eval_re": [112.26738697201367, 305.6806430659901, 
294.1071668473061, 113.84563611417538, 186.6358543146297, 308.0311324156733, 
101.30610645571078, 257.5892727739231, 89.85735293421216, 312.14923991760355], 
"eval_len": [22, 59, 54, 22, 40, 57, 20, 47, 18, 59]}

 24%|██▍       | 239998/1000000 [3:02:23<7:24:15, 28.51it/s]global step 240000, trans_decision ep_re 278.48086361231327

{"global_step": 240000, "eval_re": [272.7410343902897, 341.3721428431757, 
372.000474521841, 101.44288091140035, 107.71900969841604, 212.99591863232888, 
368.0008288993517, 399.7736848830922, 310.4582772136372, 298.30438412959984], 
"eval_len": [50, 61, 69, 20, 21, 43, 68, 73, 57, 56]}

 25%|██▍       | 249998/1000000 [3:10:20<7:20:02, 28.41it/s]global step 250000, trans_decision ep_re 327.77434635197574

{"global_step": 250000, "eval_re": [359.8573073918931, 441.6295093973477, 
414.25450331104935, 246.71092433444792, 308.2825630784411, 257.93084633291477, 
250.00677705620282, 393.9915271001255, 344.41008583269587, 260.6694196846394], 
"eval_len": [66, 80, 76, 46, 57, 48, 49, 73, 64, 49]}

 26%|██▌       | 259997/1000000 [3:17:51<7:16:50, 28.23it/s]global step 260000, trans_decision ep_re 345.99795011529585

{"global_step": 260000, "eval_re": [311.75411014617015, 430.12818555067577, 
371.1454562844574, 438.7828614843638, 294.38306907129294, 289.37289309451916, 
332.61666743709173, 425.94720025212075, 291.99787203511704, 273.85118579715004],
"eval_len": [59, 79, 69, 80, 54, 54, 60, 80, 55, 53]}

 27%|██▋       | 269999/1000000 [3:25:36<7:07:02, 28.49it/s]global step 270000, trans_decision ep_re 174.79260610849468

{"global_step": 270000, "eval_re": [273.72371557957706, 89.64353277180048, 
247.09707398557643, 322.88080170972347, 300.2714640456749, 113.42187552308613, 
96.33612320820959, 91.00153048073527, 89.39072437832283, 124.15921940224074], 
"eval_len": [51, 18, 46, 59, 57, 22, 19, 18, 18, 24]}

 28%|██▊       | 279998/1000000 [3:33:20<6:59:26, 28.61it/s]global step 280000, trans_decision ep_re 251.8463827322333

{"global_step": 280000, "eval_re": [368.37258259992086, 102.38655151488936, 
307.4650837564753, 162.02614496964173, 304.3380915836445, 95.7773570992971, 
215.90338892137603, 266.07786430723786, 414.93864367469473, 281.1781188951558], 
"eval_len": [70, 20, 56, 31, 56, 19, 42, 50, 76, 52]}

 29%|██▉       | 289999/1000000 [3:41:05<6:56:35, 28.40it/s]global step 290000, trans_decision ep_re 227.65295998939933

{"global_step": 290000, "eval_re": [91.11298399314694, 123.65310858244682, 
334.27442087231293, 341.012084386668, 278.4335227443962, 397.89606770036903, 
305.0334433004675, 102.17548212182552, 195.24377635937935, 107.69470983298078], 
"eval_len": [18, 24, 62, 64, 51, 74, 59, 20, 37, 21]}

 30%|██▉       | 299999/1000000 [3:48:52<6:55:33, 28.07it/s]global step 300000, trans_decision ep_re 255.51693824019503

{"global_step": 300000, "eval_re": [134.40552466990914, 445.99292335774294, 
323.60575569632965, 108.0111226682052, 114.33284096223365, 257.0029607540758, 
146.4302754263633, 297.4824301333447, 406.7792295439212, 321.1263191898249], 
"eval_len": [26, 84, 60, 21, 22, 48, 28, 56, 74, 59]}

 31%|███       | 309998/1000000 [3:56:50<6:37:40, 28.92it/s]global step 310000, trans_decision ep_re 253.92477491221484

{"global_step": 310000, "eval_re": [302.0464597903588, 113.16474257918188, 
131.25180537540368, 341.2130652801904, 262.18603295935054, 315.32651061957233, 
376.3171267200393, 119.25577774782096, 180.30448931753116, 398.1817387326995], 
"eval_len": [55, 22, 25, 62, 49, 58, 70, 23, 34, 74]}

 32%|███▏      | 319997/1000000 [4:04:22<6:38:36, 28.43it/s]global step 320000, trans_decision ep_re 244.4325551501719

{"global_step": 320000, "eval_re": [118.63637235897521, 357.25008852012854, 
267.3945832883194, 119.22903582266281, 363.7962883050705, 102.12830638636653, 
401.2402496778951, 338.0828184539988, 246.74493640392322, 129.82287228437906], 
"eval_len": [23, 65, 51, 23, 70, 20, 76, 62, 47, 25]}

 33%|███▎      | 329997/1000000 [4:12:05<6:33:12, 28.40it/s]global step 330000, trans_decision ep_re 327.55306413752413

{"global_step": 330000, "eval_re": [355.63171393475255, 336.51995627293644, 
323.07439413548366, 300.03408423048296, 393.2383834422218, 112.8320741993644, 
339.15406544145765, 494.41773333882213, 306.11867296636063, 314.50956341335944],
"eval_len": [65, 61, 60, 56, 72, 22, 63, 104, 56, 56]}

 34%|███▍      | 339997/1000000 [4:20:00<6:30:06, 28.20it/s]global step 340000, trans_decision ep_re 258.2318821491127

{"global_step": 340000, "eval_re": [141.49334799121786, 337.1154433814886, 
514.1367833318023, 282.8898410149104, 330.62058809120623, 116.78734562066319, 
340.397090274354, 315.42756017246285, 101.9878699097183, 101.46295170330373], 
"eval_len": [27, 63, 94, 53, 60, 23, 63, 57, 20, 20]}

 35%|███▍      | 349997/1000000 [4:27:35<6:24:36, 28.17it/s]global step 350000, trans_decision ep_re 275.1629361809046

{"global_step": 350000, "eval_re": [156.9720978902331, 112.96837238709635, 
382.07196401414177, 125.23425851630843, 251.70483352937137, 392.41826073238127, 
328.2086096732735, 275.81328864790487, 313.172183064186, 413.065493354149], 
"eval_len": [30, 22, 75, 24, 48, 71, 60, 52, 57, 75]}

 36%|███▌      | 359998/1000000 [4:35:22<6:10:57, 28.75it/s]global step 360000, trans_decision ep_re 281.5084436568814

{"global_step": 360000, "eval_re": [321.1393373220135, 370.9456336435818, 
324.28823652565893, 121.40474526175211, 307.00981565275777, 293.73484937124795, 
342.34954394493604, 328.80713267081865, 310.22850237888326, 95.17663979716396], 
"eval_len": [58, 69, 59, 24, 57, 54, 62, 60, 58, 19]}

 37%|███▋      | 369997/1000000 [4:43:06<6:10:11, 28.36it/s]global step 370000, trans_decision ep_re 312.9458043078481

{"global_step": 370000, "eval_re": [294.49539422636906, 298.06817638593094, 
421.841509050342, 308.9183520096981, 124.8678677900216, 146.93398099839533, 
399.1150982245193, 406.3580076432752, 349.9064244310694, 378.9532323188598], 
"eval_len": [54, 57, 76, 56, 24, 28, 73, 74, 69, 68]}

 38%|███▊      | 379997/1000000 [4:50:50<6:02:22, 28.52it/s]global step 380000, trans_decision ep_re 288.5412964877883

{"global_step": 380000, "eval_re": [343.054207356606, 95.3628187210591, 
338.31444812119713, 380.07775812490263, 321.96984143289393, 335.0621431220623, 
301.4069742085493, 129.79545337198883, 123.27113641519249, 517.0981840034314], 
"eval_len": [63, 19, 62, 69, 58, 68, 57, 25, 24, 100]}

 39%|███▉      | 389998/1000000 [4:58:35<5:56:12, 28.54it/s]global step 390000, trans_decision ep_re 275.67481146340583

{"global_step": 390000, "eval_re": [102.08634384694284, 455.99767989837403, 
287.3438790583167, 119.66597607689046, 112.63421839155497, 454.64338580474794, 
315.71971546397106, 478.6962472837172, 307.0939323859093, 122.86673642363391], 
"eval_len": [20, 84, 57, 23, 22, 94, 58, 88, 57, 24]}

 40%|███▉      | 399999/1000000 [5:06:30<5:50:30, 28.53it/s]global step 400000, trans_decision ep_re 206.26179902794948

{"global_step": 400000, "eval_re": [135.94025305418086, 224.438893625183, 
146.47966975974558, 108.21169446450708, 387.03701780526626, 123.01363087200686, 
297.9090772336607, 125.60228927250989, 406.69185675890117, 107.29360743353321], 
"eval_len": [26, 47, 28, 21, 71, 24, 56, 24, 74, 21]}

 41%|████      | 409997/1000000 [5:14:10<5:43:45, 28.61it/s]global step 410000, trans_decision ep_re 284.45597831119215

{"global_step": 410000, "eval_re": [122.48850160469445, 402.196616069473, 
399.8397175760285, 359.26170689939795, 433.5818432327162, 107.51763387808579, 
302.33173133825755, 502.259528366082, 106.69596764731261, 108.38653649987353], 
"eval_len": [24, 73, 72, 68, 81, 21, 57, 93, 21, 21]}

 42%|████▏     | 419999/1000000 [5:21:40<5:39:51, 28.44it/s]global step 420000, trans_decision ep_re 253.76581057757653

{"global_step": 420000, "eval_re": [398.2151108442256, 189.93293313423118, 
130.37504558875483, 227.327036909967, 308.95557987350463, 264.5980562572481, 
339.8395095281713, 264.86821919545264, 113.81158475526803, 299.7350296889422], 
"eval_len": [72, 36, 25, 43, 56, 50, 62, 50, 22, 63]}

 43%|████▎     | 429998/1000000 [5:29:21<5:27:42, 28.99it/s]global step 430000, trans_decision ep_re 241.76043594304298

{"global_step": 430000, "eval_re": [318.327847758136, 117.33988639991625, 
272.1600990899913, 124.46755324979715, 391.479929351715, 89.98767000229884, 
286.45848618192247, 327.27427054841735, 377.75014609580694, 112.35847075242842],
"eval_len": [59, 23, 51, 24, 71, 18, 55, 60, 70, 22]}

 44%|████▍     | 439999/1000000 [5:37:02<5:24:39, 28.75it/s]global step 440000, trans_decision ep_re 306.0969746514845

{"global_step": 440000, "eval_re": [119.44590658828005, 389.6238004339801, 
457.0365923443411, 119.18849319716536, 357.2497191575867, 305.57773647537664, 
166.77879936745114, 534.7482218861982, 296.2790943561637, 315.041382708302], 
"eval_len": [23, 70, 84, 23, 66, 55, 32, 99, 54, 58]}

 45%|████▍     | 449999/1000000 [5:44:44<5:19:58, 28.65it/s]global step 450000, trans_decision ep_re 294.782416981159

{"global_step": 450000, "eval_re": [106.72798945724097, 304.5447939839537, 
350.388574909999, 337.05578968441785, 334.1695329890837, 118.85787948746217, 
479.269552321558, 95.00208890872268, 357.8512119581575, 463.95675611099426], 
"eval_len": [21, 55, 65, 62, 61, 23, 91, 19, 65, 86]}

 46%|████▌     | 459998/1000000 [5:52:26<5:08:29, 29.17it/s]global step 460000, trans_decision ep_re 264.7672799081335

{"global_step": 460000, "eval_re": [342.01263964989585, 479.598038345326, 
339.19538048312364, 135.41446763584426, 114.3317755467047, 102.43537765881793, 
133.1568528658579, 276.0249713246449, 350.475437576781, 375.027857994339], 
"eval_len": [61, 89, 64, 26, 22, 20, 26, 51, 66, 68]}

 47%|████▋     | 469999/1000000 [6:00:20<5:12:09, 28.30it/s]global step 470000, trans_decision ep_re 291.26415064427727

{"global_step": 470000, "eval_re": [321.98393760197166, 398.83608742613075, 
388.0449743098193, 152.9014027748019, 131.43588821448438, 284.0004770803351, 
256.1401817932044, 124.44290010778026, 483.1725242811296, 371.6831328531157], 
"eval_len": [60, 73, 72, 29, 25, 52, 49, 24, 90, 70]}

 48%|████▊     | 479999/1000000 [6:07:53<5:03:20, 28.57it/s]global step 480000, trans_decision ep_re 274.9021552583449

{"global_step": 480000, "eval_re": [478.5553630943717, 112.36307513920204, 
90.05496703655555, 101.18095030762366, 275.0106366953921, 325.88046253785666, 
325.5848745727922, 353.5108135761813, 324.7185223117267, 362.16188731174725], 
"eval_len": [88, 22, 18, 20, 50, 60, 59, 64, 60, 67]}

 49%|████▉     | 489999/1000000 [6:15:50<4:56:58, 28.62it/s]global step 490000, trans_decision ep_re 328.66867597207636

{"global_step": 490000, "eval_re": [498.4534939414764, 362.0441899928078, 
340.59259018726885, 90.11998083155243, 326.88019379774306, 372.6002329297886, 
333.0135400417382, 95.9034104138902, 488.9377744292231, 378.14135315527443], 
"eval_len": [94, 65, 61, 18, 61, 68, 61, 19, 92, 68]}

 50%|████▉     | 499999/1000000 [6:23:22<4:54:27, 28.30it/s]global step 500000, trans_decision ep_re 311.7323495571278

{"global_step": 500000, "eval_re": [112.9707503715156, 462.11540179212443, 
306.97762479242635, 108.62044411062422, 102.79965848889154, 467.1950030951971, 
410.5139987202311, 407.56494792847167, 364.4149666375632, 374.15069963423315], 
"eval_len": [22, 87, 56, 21, 20, 91, 74, 76, 65, 68]}

 51%|█████     | 509999/1000000 [6:31:20<4:48:39, 28.29it/s]global step 510000, trans_decision ep_re 233.3619921933111

{"global_step": 510000, "eval_re": [331.8226208788883, 135.3329263003637, 
333.75531896588933, 181.07691989448102, 410.067025007409, 107.95143726280116, 
124.68427225391085, 302.34074769460483, 298.4425238539814, 108.1461298207813], 
"eval_len": [61, 26, 61, 34, 78, 21, 24, 56, 55, 21]}

 52%|█████▏    | 519997/1000000 [6:38:52<4:40:23, 28.53it/s]global step 520000, trans_decision ep_re 260.161243554755

{"global_step": 520000, "eval_re": [320.69335782856194, 305.4392980910954, 
333.90953645563013, 113.67472167564429, 336.9002093064965, 141.596568946613, 
280.853198433598, 141.2436450790906, 343.82837415228175, 283.473525578538], 
"eval_len": [59, 58, 62, 22, 61, 27, 52, 27, 62, 53]}

 53%|█████▎    | 529998/1000000 [6:46:36<4:31:05, 28.90it/s]global step 530000, trans_decision ep_re 251.0799128138928

{"global_step": 530000, "eval_re": [106.92434860451652, 129.9025808417081, 
470.1120390650263, 108.78208472123129, 341.45919981179463, 242.15973218204738, 
302.2243788660564, 113.25581086043215, 341.76385990289896, 354.21509328321633], 
"eval_len": [21, 25, 88, 21, 65, 46, 56, 22, 62, 64]}

 54%|█████▍    | 539997/1000000 [6:54:20<4:27:08, 28.70it/s]global step 540000, trans_decision ep_re 348.1522250646601

{"global_step": 540000, "eval_re": [281.37953439238504, 251.4683745519387, 
288.3394854281946, 819.5574491348924, 392.490008501852, 101.65220819948765, 
102.29851290182512, 335.640091933796, 501.1451041876463, 407.5514814145831], 
"eval_len": [52, 48, 52, 156, 71, 20, 20, 63, 98, 74]}

 55%|█████▍    | 549999/1000000 [7:02:04<4:20:54, 28.75it/s]global step 550000, trans_decision ep_re 273.0391486694747

{"global_step": 550000, "eval_re": [346.32530680111677, 356.914934099377, 
107.11451935888181, 108.50794549470793, 315.8921323648757, 102.59605148120025, 
259.17984895709293, 394.06261387512734, 398.66656171943544, 341.131572542932], 
"eval_len": [64, 66, 21, 21, 59, 20, 49, 72, 76, 62]}

 56%|█████▌    | 559999/1000000 [7:10:00<4:17:47, 28.45it/s]global step 560000, trans_decision ep_re 300.68775650903996

{"global_step": 560000, "eval_re": [473.03339714892974, 377.73311210953483, 
95.65979577446244, 141.80859003436603, 302.46571732332103, 590.344064382776, 
342.1623086205008, 108.19857107662997, 288.01111168141387, 287.4608969384649], 
"eval_len": [94, 71, 19, 27, 55, 110, 67, 21, 54, 51]}

 57%|█████▋    | 569997/1000000 [7:17:31<4:08:44, 28.81it/s]global step 570000, trans_decision ep_re 276.39046367649223

{"global_step": 570000, "eval_re": [335.29724257370395, 118.6796760444658, 
147.3707115608221, 280.7812622964853, 342.3012105631693, 360.16624633347453, 
186.04851279227813, 119.35050806245857, 609.654176429631, 264.2550901084338], 
"eval_len": [64, 23, 28, 52, 65, 66, 35, 23, 108, 53]}

 58%|█████▊    | 579999/1000000 [7:25:15<4:03:14, 28.78it/s]global step 580000, trans_decision ep_re 243.66635055029096

{"global_step": 580000, "eval_re": [347.127631203512, 350.8013590936804, 
260.14071465007737, 153.5654387727437, 106.90101074445779, 360.0191444022063, 
305.24474426594037, 162.97067392509533, 276.4868065875644, 113.4059818576319], 
"eval_len": [63, 63, 48, 29, 21, 67, 55, 31, 51, 22]}

 59%|█████▉    | 589998/1000000 [7:33:10<3:57:45, 28.74it/s]global step 590000, trans_decision ep_re 260.414959356207

{"global_step": 590000, "eval_re": [288.21591551097515, 168.74615811389134, 
445.34716996777735, 101.62204580744641, 509.5329726264738, 108.2884034948275, 
271.01588272354985, 113.47519269574278, 351.22490866251127, 246.68094395887456],
"eval_len": [52, 32, 79, 20, 96, 21, 52, 22, 65, 47]}

 60%|█████▉    | 599999/1000000 [7:40:43<3:55:06, 28.36it/s]global step 600000, trans_decision ep_re 264.5395068084768

{"global_step": 600000, "eval_re": [112.05281349623664, 413.3583942685656, 
263.47581267716794, 341.69785264843864, 141.10879825645765, 317.80935859048407, 
407.881151459762, 409.05572208368125, 108.36004210559977, 130.59512249837414], 
"eval_len": [22, 80, 51, 61, 27, 59, 75, 77, 21, 25]}

 61%|██████    | 609999/1000000 [7:48:40<3:47:02, 28.63it/s]global step 610000, trans_decision ep_re 322.1706869809044

{"global_step": 610000, "eval_re": [378.64518721993727, 320.20071465292506, 
362.744458983403, 322.9519207091543, 146.7169511402459, 305.2724467034756, 
516.3576140381167, 374.82341286325567, 112.99878010491723, 380.99538339361317], 
"eval_len": [71, 60, 69, 59, 28, 55, 99, 67, 22, 70]}

 62%|██████▏   | 619999/1000000 [7:56:13<3:39:49, 28.81it/s]global step 620000, trans_decision ep_re 277.8389051784999

{"global_step": 620000, "eval_re": [394.65374169343545, 418.1923725675742, 
379.38044097634855, 310.92631122767136, 102.18259636182913, 332.6487743381188, 
101.85267342449949, 108.1129214058305, 315.6040650165775, 314.8351547731144], 
"eval_len": [72, 76, 69, 56, 20, 62, 20, 21, 58, 61]}

 63%|██████▎   | 629998/1000000 [8:03:56<3:33:03, 28.94it/s]global step 630000, trans_decision ep_re 331.054012319017

{"global_step": 630000, "eval_re": [432.4051467809459, 395.34743786486166, 
298.9778851750991, 187.78803990416282, 387.3350519673232, 469.3585311554697, 
323.2845555999877, 90.94074297890424, 314.3495520235133, 410.7531797399022], 
"eval_len": [78, 72, 54, 36, 70, 89, 60, 18, 57, 74]}

 64%|██████▍   | 639999/1000000 [8:11:50<3:30:07, 28.56it/s]global step 640000, trans_decision ep_re 253.30522738604623

{"global_step": 640000, "eval_re": [95.95153086907449, 347.0634441670231, 
334.7062123479311, 102.4555016367017, 158.38954074478224, 306.10895181087136, 
341.6574701495256, 363.03349601197084, 364.3276769966753, 119.3584491259068], 
"eval_len": [19, 66, 61, 20, 30, 56, 63, 65, 67, 23]}

 65%|██████▍   | 649997/1000000 [8:19:22<3:24:05, 28.58it/s]global step 650000, trans_decision ep_re 254.98656828781105

{"global_step": 650000, "eval_re": [173.05257167856286, 409.83081641159237, 
130.89142101435894, 337.0199125609426, 439.93641650769933, 101.6224005231816, 
107.09700220314663, 405.72880980893194, 336.4989153709176, 108.18741679877611], 
"eval_len": [33, 74, 25, 62, 78, 20, 21, 72, 65, 21]}

 66%|██████▌   | 659999/1000000 [8:27:06<3:17:35, 28.68it/s]global step 660000, trans_decision ep_re 295.4113538395583

{"global_step": 660000, "eval_re": [107.50634859862024, 544.2222563060632, 
108.70377949867549, 703.7767995225853, 96.36693000561155, 289.9454970210503, 
114.05564868599883, 300.3648099642837, 374.40583386071313, 314.765634931981], 
"eval_len": [21, 99, 21, 131, 19, 53, 22, 55, 75, 58]}

 67%|██████▋   | 669999/1000000 [8:34:50<3:14:19, 28.30it/s]global step 670000, trans_decision ep_re 383.8557981934735

{"global_step": 670000, "eval_re": [530.9518499645426, 303.1163428951817, 
542.7280423175738, 664.1918192301847, 324.24962878657396, 156.92004817734056, 
401.4840911820034, 366.96582196977556, 440.4089333913669, 107.5414040201922], 
"eval_len": [99, 57, 101, 117, 60, 30, 71, 68, 79, 21]}

 68%|██████▊   | 679998/1000000 [8:42:35<3:04:27, 28.91it/s]global step 680000, trans_decision ep_re 309.4444000199707

{"global_step": 680000, "eval_re": [112.42444911705647, 331.33240102012377, 
135.46949984770112, 501.0540267239437, 118.55841342594576, 95.90136230990359, 
433.45407736393497, 764.4974819777483, 511.7868158053939, 89.9654726079556], 
"eval_len": [22, 60, 26, 89, 23, 19, 78, 144, 92, 18]}

 69%|██████▉   | 689997/1000000 [8:50:30<2:59:50, 28.73it/s]global step 690000, trans_decision ep_re 327.5155629725499

{"global_step": 690000, "eval_re": [266.40674418619744, 291.59050500471784, 
196.6904595921417, 139.53354744710495, 790.1257451369212, 376.78137471292854, 
385.45713184871124, 248.26778141034308, 278.4348714712724, 301.86746891516094], 
"eval_len": [49, 54, 37, 27, 146, 67, 70, 47, 52, 56]}

 70%|██████▉   | 699999/1000000 [8:58:02<2:55:38, 28.47it/s]global step 700000, trans_decision ep_re 242.41885104551943

{"global_step": 700000, "eval_re": [124.00473178373717, 292.928680348753, 
131.59247409430264, 515.9934055994512, 103.20412639908616, 330.17456448846826, 
118.67726498609233, 101.29699105183651, 359.64350242593815, 346.672769277529], 
"eval_len": [24, 53, 25, 94, 20, 60, 23, 20, 65, 64]}

 71%|███████   | 709998/1000000 [9:05:45<2:47:52, 28.79it/s]global step 710000, trans_decision ep_re 251.4845091021335

{"global_step": 710000, "eval_re": [111.62273208376727, 96.46259128695344, 
158.01153126359142, 114.07423037924218, 303.05855767631203, 357.02750819731096, 
400.6342746297088, 440.5939054610683, 310.1524016478709, 223.20735839550994], 
"eval_len": [22, 19, 30, 22, 56, 64, 71, 81, 56, 45]}

 72%|███████▏  | 719997/1000000 [9:13:40<2:44:15, 28.41it/s]global step 720000, trans_decision ep_re 232.85660905018312

{"global_step": 720000, "eval_re": [526.5356475754892, 130.8691500989047, 
595.5929923004435, 124.5389501671033, 96.41933970330484, 128.69412919585406, 
106.96337352648901, 141.24596139966638, 336.8418225651311, 140.86472396944504], 
"eval_len": [99, 25, 121, 24, 19, 25, 21, 27, 61, 27]}

 73%|███████▎  | 729997/1000000 [9:21:13<2:36:32, 28.75it/s]global step 730000, trans_decision ep_re 297.3017508563834

{"global_step": 730000, "eval_re": [197.41750828938433, 157.64202116023563, 
274.1538911915418, 354.7502366298112, 310.8870270385123, 356.6497116115905, 
285.8927513915725, 343.32017271911735, 376.1515501985899, 316.15263833347905], 
"eval_len": [37, 30, 50, 65, 57, 65, 52, 64, 69, 57]}

 74%|███████▍  | 739999/1000000 [9:28:56<2:32:06, 28.49it/s]global step 740000, trans_decision ep_re 243.7769398118881

{"global_step": 740000, "eval_re": [343.9530896400001, 90.89903522211894, 
325.00735097198003, 380.78327488297657, 178.05242760308337, 119.88800552828904, 
102.46602905365661, 420.6708335542339, 108.20122685939242, 367.84812480314986], 
"eval_len": [65, 18, 61, 68, 34, 23, 20, 75, 21, 68]}

 75%|███████▍  | 749998/1000000 [9:36:40<2:24:25, 28.85it/s]global step 750000, trans_decision ep_re 322.07177277524335

{"global_step": 750000, "eval_re": [758.6019391937541, 167.26341660825275, 
96.67618607121213, 332.4580341508807, 259.342906748564, 166.70206715096504, 
112.59793945420772, 793.0273084146226, 399.4879955982505, 134.55993436172392], 
"eval_len": [147, 32, 19, 59, 50, 32, 22, 147, 71, 26]}

 76%|███████▌  | 759997/1000000 [9:44:24<2:19:09, 28.74it/s]global step 760000, trans_decision ep_re 336.46950228438385

{"global_step": 760000, "eval_re": [370.0716157758484, 196.55897118816773, 
466.39560084937733, 313.37978503872444, 480.41331166903194, 285.9629567673727, 
119.18375068624908, 442.2947483742846, 388.9721303095348, 301.46215218524713], 
"eval_len": [67, 37, 82, 57, 88, 53, 23, 82, 69, 58]}

 77%|███████▋  | 769998/1000000 [9:52:20<2:13:45, 28.66it/s]global step 770000, trans_decision ep_re 361.22002140846456

{"global_step": 770000, "eval_re": [96.65007544022434, 502.72554342362923, 
480.9433336787947, 500.4115300440319, 96.29311767028662, 389.46038147069174, 
475.58545956150624, 334.08967384048515, 432.8795288701024, 303.16157008489375], 
"eval_len": [19, 88, 90, 89, 19, 72, 86, 61, 77, 59]}

 78%|███████▊  | 779999/1000000 [9:59:51<2:09:11, 28.38it/s]global step 780000, trans_decision ep_re 339.59911051902236

{"global_step": 780000, "eval_re": [354.80361828066754, 381.02026090671376, 
114.03794648592798, 326.7242913666091, 392.61644242010436, 344.79602476431285, 
382.09210007784304, 342.039854744192, 406.63074208301543, 351.22982406083764], 
"eval_len": [66, 68, 22, 59, 68, 61, 70, 64, 77, 62]}

 79%|███████▉  | 789998/1000000 [10:07:34<2:01:37, 28.78it/s]global step 790000, trans_decision ep_re 338.4984593903274

{"global_step": 790000, "eval_re": [287.9247856883271, 100.65484999841878, 
392.34499668493993, 95.59160833640011, 310.7280737816037, 578.2191141587762, 
603.2282047077172, 287.0835667205283, 157.48659342523598, 571.7228004013261], 
"eval_len": [58, 20, 71, 19, 58, 106, 114, 54, 30, 101]}

 80%|███████▉  | 799997/1000000 [10:15:17<1:57:46, 28.30it/s]global step 800000, trans_decision ep_re 278.10879305124456

{"global_step": 800000, "eval_re": [465.9962612975181, 135.08376795866286, 
102.55954703331808, 400.40923079046667, 307.64781901817554, 166.8965536744911, 
155.736419979477, 161.50344345461355, 512.4701950328714, 372.7846922728516], 
"eval_len": [81, 26, 20, 72, 56, 32, 30, 31, 92, 69]}

 81%|████████  | 809999/1000000 [10:23:10<1:50:35, 28.63it/s]global step 810000, trans_decision ep_re 253.89891411136176

{"global_step": 810000, "eval_re": [298.7996746745463, 101.84535411065002, 
314.77815401500146, 132.92015850306697, 605.0537767939742, 176.7069928869757, 
355.7497495599886, 139.0483227288674, 311.59770301661143, 102.4892548239361], 
"eval_len": [56, 20, 59, 26, 110, 34, 63, 27, 57, 20]}

 82%|████████▏ | 819997/1000000 [10:30:43<1:44:12, 28.79it/s]global step 820000, trans_decision ep_re 423.6735880372289

{"global_step": 820000, "eval_re": [102.46704670486412, 334.6176349182511, 
329.7256215014484, 434.70749474308263, 475.30595726759765, 258.57537400893017, 
445.09236127450214, 295.7762071713523, 449.9114359759695, 1110.556746806291], 
"eval_len": [20, 60, 61, 79, 86, 52, 78, 55, 83, 220]}

 83%|████████▎ | 829999/1000000 [10:38:40<1:39:26, 28.49it/s]global step 830000, trans_decision ep_re 291.4335054313253

{"global_step": 830000, "eval_re": [905.0426834705894, 436.1934007880229, 
106.96591632875028, 102.45299761777258, 335.2943436860877, 114.30202227393211, 
310.1558430747977, 372.3596088275141, 130.3564807844192, 101.21175746136723], 
"eval_len": [169, 78, 21, 20, 61, 22, 60, 69, 25, 20]}

 84%|████████▍ | 839999/1000000 [10:46:11<1:33:34, 28.50it/s]global step 840000, trans_decision ep_re 292.9225177896807

{"global_step": 840000, "eval_re": [102.70477560129068, 350.74342242825645, 
306.1325305628694, 330.60468814327265, 129.1214824763043, 365.2828556859526, 
333.6108519547585, 760.7782225722324, 120.29428312934412, 129.95206534252554], 
"eval_len": [20, 64, 56, 58, 25, 68, 61, 142, 23, 25]}

 85%|████████▍ | 849997/1000000 [10:53:54<1:27:39, 28.52it/s]global step 850000, trans_decision ep_re 258.13976585669866

{"global_step": 850000, "eval_re": [107.38776565745424, 211.31788320785535, 
102.1980549353976, 123.51197561293401, 323.0438913514414, 389.963824568918, 
249.96615741116605, 106.94728327998078, 427.65385474394975, 539.4069677978895], 
"eval_len": [21, 40, 20, 24, 59, 74, 49, 21, 77, 98]}

 86%|████████▌ | 859997/1000000 [11:01:50<1:22:35, 28.25it/s]global step 860000, trans_decision ep_re 327.0620699694153

{"global_step": 860000, "eval_re": [522.0569311572782, 147.75045823980784, 
705.3876348350635, 129.91772008254458, 621.2989827537685, 117.90563440677528, 
122.4582659994629, 136.66798161235744, 585.0959727250115, 182.08111788208276], 
"eval_len": [94, 28, 124, 25, 110, 23, 24, 26, 108, 35]}

 87%|████████▋ | 869997/1000000 [11:09:21<1:16:10, 28.45it/s]global step 870000, trans_decision ep_re 275.61617732892074

{"global_step": 870000, "eval_re": [486.74255432438696, 423.8421081961285, 
136.57010724198534, 305.0809737669158, 113.93088428279846, 180.4796914229298, 
216.4688499747541, 397.2603123592503, 112.92598100030102, 382.86031071975736], 
"eval_len": [96, 75, 26, 54, 22, 34, 41, 71, 22, 71]}

 88%|████████▊ | 879998/1000000 [11:17:05<1:09:37, 28.73it/s]global step 880000, trans_decision ep_re 312.60346823092567

{"global_step": 880000, "eval_re": [345.85674932901446, 332.72320263395835, 
502.45202534243714, 337.4885647264059, 331.1293730107421, 573.6067951062741, 
112.70186422447637, 189.38223585941546, 111.80370051954564, 288.8901715569871], 
"eval_len": [62, 61, 89, 63, 62, 108, 22, 36, 22, 53]}

 89%|████████▉ | 889997/1000000 [11:25:00<1:04:12, 28.55it/s]global step 890000, trans_decision ep_re 342.6437269676289

{"global_step": 890000, "eval_re": [167.17773846574596, 458.14097176555674, 
354.30369426884783, 609.5967071564805, 130.61355773121824, 551.4446280135412, 
113.7790328358117, 363.41210253271504, 171.9564558677463, 506.0123810386257], 
"eval_len": [32, 80, 63, 114, 25, 116, 22, 66, 33, 89]}

 90%|████████▉ | 899997/1000000 [11:32:33<58:18, 28.58it/s]global step 900000, trans_decision ep_re 284.1094071870774

{"global_step": 900000, "eval_re": [278.524193275935, 147.3397871570859, 
119.15178642493339, 185.2017726947484, 293.08275245819186, 117.16863366606084, 
289.49124678840445, 351.69271776179613, 416.2676387637538, 643.1735428798645], 
"eval_len": [50, 28, 23, 35, 56, 23, 56, 62, 84, 117]}

 91%|█████████ | 909998/1000000 [11:40:30<52:21, 28.65it/s]global step 910000, trans_decision ep_re 292.91969671873045

{"global_step": 910000, "eval_re": [407.4647321792696, 406.8932955802766, 
290.0005775683545, 393.7377430347675, 101.6197742090119, 90.53510337954614, 
446.02019967121544, 346.3164298651294, 344.8014602592046, 101.80765144052833], 
"eval_len": [75, 75, 53, 72, 20, 18, 80, 63, 67, 20]}

 92%|█████████▏| 919997/1000000 [11:48:04<46:48, 28.49it/s]global step 920000, trans_decision ep_re 250.72222219783285

{"global_step": 920000, "eval_re": [102.40793577841835, 96.83576626065874, 
276.9342842194437, 178.98969777239057, 474.2021916856479, 303.230211939096, 
474.17273415335734, 356.9492642161082, 108.79881909443112, 134.70131685877624], 
"eval_len": [20, 19, 51, 34, 91, 57, 82, 64, 21, 26]}

 93%|█████████▎| 929999/1000000 [11:56:00<41:01, 28.44it/s]global step 930000, trans_decision ep_re 194.26883959412504

{"global_step": 930000, "eval_re": [124.94956600153408, 273.4435929565986, 
108.5485350796835, 107.7347905091842, 317.2245170219927, 323.9986260631786, 
103.01032401998843, 328.3774526522121, 90.45805897634645, 164.9429326605318], 
"eval_len": [24, 51, 21, 21, 57, 57, 20, 59, 18, 32]}

 94%|█████████▍| 939997/1000000 [12:03:35<35:30, 28.16it/s]global step 940000, trans_decision ep_re 325.43907879309205

{"global_step": 940000, "eval_re": [90.88190906365999, 112.35533089185566, 
877.9973568208251, 430.65882729161063, 320.60282847695703, 96.46953372038891, 
342.2457323985388, 416.0426401966986, 144.4289684712498, 422.70766059913615], 
"eval_len": [18, 22, 162, 74, 59, 19, 62, 77, 28, 76]}

 95%|█████████▍| 949997/1000000 [12:11:30<29:25, 28.32it/s]global step 950000, trans_decision ep_re 323.1961507281512

{"global_step": 950000, "eval_re": [108.2399418489792, 300.0413688082725, 
371.1421034565041, 320.7098891731934, 304.5259806830272, 462.0779763101423, 
388.28957792950945, 470.6728605091398, 124.52934713862398, 381.73246142412034], 
"eval_len": [21, 56, 66, 60, 58, 81, 69, 85, 24, 66]}

 96%|█████████▌| 959998/1000000 [12:19:04<23:06, 28.84it/s]global step 960000, trans_decision ep_re 254.14321371698466

{"global_step": 960000, "eval_re": [123.81483809276854, 472.9215196901456, 
107.65427630595777, 445.22395494695974, 327.11707479263043, 113.32153170026025, 
309.64675221723775, 146.78422453362558, 364.67352357344936, 130.2744413168115], 
"eval_len": [24, 83, 21, 77, 57, 22, 58, 28, 67, 25]}

 97%|█████████▋| 969997/1000000 [12:27:00<17:30, 28.56it/s]global step 970000, trans_decision ep_re 306.97886962174385

{"global_step": 970000, "eval_re": [436.9596603248682, 264.5614996983519, 
476.9583387764499, 125.30703154928082, 118.45107462558606, 512.8828144473448, 
133.45888175833085, 419.2078116082635, 108.4579260503738, 473.5436573785888], 
"eval_len": [78, 50, 88, 24, 23, 94, 26, 73, 21, 86]}

 98%|█████████▊| 979999/1000000 [12:34:33<11:43, 28.43it/s]global step 980000, trans_decision ep_re 285.27212221652167

{"global_step": 980000, "eval_re": [101.85147952092102, 403.48315724917444, 
118.30243675242731, 479.3862522106741, 147.3730172556352, 140.92513734933814, 
113.50626391273593, 430.49455390233055, 436.87559437303787, 480.52332963894196],
"eval_len": [20, 70, 23, 93, 28, 27, 22, 80, 77, 92]}

 99%|█████████▉| 989998/1000000 [12:42:40<05:56, 28.08it/s]global step 990000, trans_decision ep_re 292.8434459714389

{"global_step": 990000, "eval_re": [152.99900989662308, 118.61036554436237, 
456.0717034396135, 352.5766154713601, 90.60155879597322, 106.19146926596768, 
135.85330284878668, 385.1186458751905, 702.9660423165114, 427.44574626], 
"eval_len": [29, 23, 83, 64, 18, 21, 26, 74, 136, 73]}

100%|█████████▉| 999997/1000000 [12:50:26<00:00, 27.31it/s]global step 1000000, trans_decision ep_re 358.0613377252882

{"global_step": 1000000, "eval_re": [413.83624645820674, 134.46031501299535, 
108.4223001402996, 415.87427956591466, 421.77956049395357, 107.96680724112164, 
443.3803387334935, 130.62609566651125, 894.0994861259778, 510.1679478144078], 
"eval_len": [73, 26, 21, 76, 77, 21, 79, 25, 160, 88]}

100%|██████████| 1000000/1000000 [12:50:39<00:00, 21.63it/s]
