
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:40<13:02:27, 21.09it/s]global step 10000, trans_decision ep_re -234.48199261044152

{"global_step": 10000, "eval_re": [-258.2787187393853, -262.2370556523358, 
-234.5468260753084, -198.17312150265676, -204.53839093044908, 
-224.11481584539794, -173.52242182861136, -274.52403045433425, 
-266.12768332182446, -248.75686175411178], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19998/1000000 [16:50<12:48:48, 21.25it/s]global step 20000, trans_decision ep_re -94.11597140270183

{"global_step": 20000, "eval_re": [-128.52061066071235, 25.669702143608575, 
-399.1180755284957, 47.64755979513239, -103.31464472296888, -10.316938363363123,
-69.76310499060432, -73.02580467056175, -46.28386222623817, 
-184.13393480281485], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [28:10<12:53:21, 20.90it/s]global step 30000, trans_decision ep_re 5.25962915882169

{"global_step": 30000, "eval_re": [114.21853921831111, -60.89864687610444, 
-108.42505450428322, 25.077493460477296, 38.53827297848149, 309.16105352884085, 
-123.51304656446622, -39.49036726608755, -189.4597982873162, 87.3878459003638], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [39:20<12:45:48, 20.89it/s]global step 40000, trans_decision ep_re 264.30471524906574

{"global_step": 40000, "eval_re": [31.5470693318984, -258.78644372386367, 
446.1211728652486, 348.0553695034945, 408.9233599153008, 151.34801324242835, 
392.0296741561177, 342.8559391500782, 506.65027448255466, 274.30272356739994], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49998/1000000 [50:30<12:31:55, 21.06it/s]global step 50000, trans_decision ep_re 177.62211281083233

{"global_step": 50000, "eval_re": [454.6854054695124, -42.67258921676161, 
264.1525503640876, 398.1301661612673, 22.510439144392237, 19.67973405242964, 
472.01365745706687, -61.46719213572684, 323.25528942037664, -74.06633260832068],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [1:01:50<12:27:52, 20.95it/s]global step 60000, trans_decision ep_re 173.6687538722948

{"global_step": 60000, "eval_re": [427.64406832980455, -162.87694014082365, 
423.4984836288769, -133.80261537778713, 504.7548553305024, 422.92011440732983, 
180.6464125798763, -368.9780118825752, 15.87467292250391, 427.0064989252398], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [1:13:00<12:23:56, 20.84it/s]global step 70000, trans_decision ep_re 202.88456235788163

{"global_step": 70000, "eval_re": [168.34338483824922, 573.1877671140575, 
249.6778613417858, -180.86796698498006, 424.6939618810997, 354.8706553318549, 
242.92372976385755, -282.5120670744616, 105.66378174153557, 372.8645156258179], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:24:20<12:05:15, 21.14it/s]global step 80000, trans_decision ep_re 176.63957674652016

{"global_step": 80000, "eval_re": [-40.29194284372124, 253.6668350795219, 
172.15132902866895, 680.0797347382678, 66.32311781020485, 467.51183208192003, 
82.49485354739264, -291.722577950139, 307.78832505304194, 68.39426092004396], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:35:30<12:06:33, 20.87it/s]global step 90000, trans_decision ep_re 183.76861604077445

{"global_step": 90000, "eval_re": [567.9759156111605, 212.06543834503398, 
429.1032644539977, -139.21009746954635, 372.6426422592817, 550.2214761368655, 
125.1758332850052, 174.1327258005007, -229.26027229258656, -225.1607657219678], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:46:50<12:05:50, 20.67it/s]global step 100000, trans_decision ep_re 207.70585211646465

{"global_step": 100000, "eval_re": [140.2488869003686, 245.5862766198033, 
220.2537753915826, 327.7946845883683, 279.5141110140343, 602.0195294303903, 
-170.30605622804333, 195.7348280788877, 63.06027449197217, 173.15221087728256], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [1:58:00<11:38:43, 21.23it/s]global step 110000, trans_decision ep_re 174.65613076882732

{"global_step": 110000, "eval_re": [147.23675718056904, 221.4664723975616, 
-348.69084779874424, 547.700571894412, 224.7346198736542, 106.50371323058191, 
181.06894928333173, 273.49025311550355, 240.433046732677, 152.6177717787265], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [2:09:20<11:39:43, 20.96it/s]global step 120000, trans_decision ep_re 274.4443784637064

{"global_step": 120000, "eval_re": [481.2213896603041, 176.64707553107004, 
26.199202755023492, 648.1434373861057, -95.61065260873993, 477.46280686067774, 
512.4219078483807, 112.27763173519986, 384.796596796083, 20.88438867295904], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129998/1000000 [2:20:30<11:23:05, 21.23it/s]global step 130000, trans_decision ep_re 199.091620330743

{"global_step": 130000, "eval_re": [436.29999465798755, 268.8688011354706, 
506.7364446373082, 84.20523351961825, -224.84860243216397, 185.69153088398812, 
666.7059884413803, -239.94758619055858, 146.74069058086903, 160.4637080735306], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [2:31:40<11:24:16, 20.95it/s]global step 140000, trans_decision ep_re 299.5863988844802

{"global_step": 140000, "eval_re": [278.231490872375, -61.865513044928605, 
295.9886383597223, 475.8792832610537, 355.9346185763814, 595.4257792491029, 
5.0632508533124705, 243.9845262313691, 247.2826487203586, 559.939265766055], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:43:00<11:18:56, 20.87it/s]global step 150000, trans_decision ep_re 180.90944276880117

{"global_step": 150000, "eval_re": [244.35511583516717, 47.26459791238383, 
114.83504521220947, 72.46187111586971, -40.54092274894809, 217.64879471098348, 
146.625712818471, 212.29618610352335, 237.75827098883119, 556.3897557395209], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [2:54:10<11:06:18, 21.01it/s]global step 160000, trans_decision ep_re 343.5365551259481

{"global_step": 160000, "eval_re": [523.1460833106556, 99.73737728387545, 
246.48617138223722, 402.8694578611213, 400.6089604774702, 326.733433659591, 
551.3975003673893, 305.0397814123244, 620.7096994681223, -41.36291396330598], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169999/1000000 [3:05:20<11:02:32, 20.88it/s]global step 170000, trans_decision ep_re 381.15183115765274

{"global_step": 170000, "eval_re": [146.51080307971662, 569.6556057022548, 
327.14610419889897, 604.3331243274661, 661.5337209908207, 417.52524972012435, 
437.96741373435214, 129.47565605298857, 77.33693151731603, 440.0337022525892], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179998/1000000 [3:16:40<10:46:53, 21.13it/s]global step 180000, trans_decision ep_re 503.4619350051199

{"global_step": 180000, "eval_re": [398.229377790417, 371.3506028069943, 
535.7546212970477, 484.36329273869984, 245.53799685464116, 620.3116815871064, 
610.6686068674092, 655.3306142684928, 531.8851783845471, 581.1873774558437], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [3:27:50<10:40:05, 21.09it/s]global step 190000, trans_decision ep_re 351.53507332651935

{"global_step": 190000, "eval_re": [608.0108651941429, 331.2132881014811, 
301.0420420592179, 303.4287365526676, 454.5107195089355, 650.5832703622136, 
-131.75891356255988, -61.48594861658085, 506.7756085496358, 553.03106511604], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [3:39:10<10:28:15, 21.22it/s]global step 200000, trans_decision ep_re 658.3432613664991

{"global_step": 200000, "eval_re": [549.188084586174, 575.3522877292793, 
678.6377814015784, 580.4226011865459, 746.2413132273867, 826.0020166528507, 
560.8175217701978, 633.6087383703107, 735.6947645894498, 697.4675041512176], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:50:20<10:27:20, 20.99it/s]global step 210000, trans_decision ep_re 559.1498391605612

{"global_step": 210000, "eval_re": [591.2221768261994, 629.04846065445, 
445.21822835491514, 359.6623809471534, 624.7784615311807, 514.0028807460442, 
615.2386276417777, 547.2619894644691, 725.2817398293583, 539.7834456100644], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [4:01:30<10:21:58, 20.90it/s]global step 220000, trans_decision ep_re 595.4074574620377

{"global_step": 220000, "eval_re": [698.5362208018796, 106.68778334835612, 
591.9760386911001, 580.8524164944743, 583.7316378036119, 620.3292187984541, 
694.1316992205565, 591.2623894240647, 729.0202359632315, 757.5469340746473], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [4:12:50<10:14:53, 20.87it/s]global step 230000, trans_decision ep_re 550.819792155082

{"global_step": 230000, "eval_re": [590.1367165352457, 584.4205554147164, 
666.4913221462886, 651.9243543367949, 411.71170505423396, 260.61079891453545, 
549.9378136345524, 591.9087729711189, 499.0102539062991, 702.0456286370353], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239998/1000000 [4:24:00<9:54:47, 21.30it/s]global step 240000, trans_decision ep_re 614.0577843416779

{"global_step": 240000, "eval_re": [624.3793384296068, 626.0410642329585, 
587.8489072477407, 682.1213708960055, 741.1701298190985, 524.4663281247233, 
661.9713206367377, 699.7951301061182, 614.8110683731862, 377.97318555060343], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [4:35:10<9:40:17, 21.54it/s]global step 250000, trans_decision ep_re 609.8706492720169

{"global_step": 250000, "eval_re": [325.7004513866501, 629.7073464362812, 
716.0757924488365, 343.85836770362056, 961.69568747277, 588.5756797893062, 
552.750469361062, 520.0103969565114, 635.7152676470947, 824.6170335180353], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [4:46:20<9:43:05, 21.15it/s]global step 260000, trans_decision ep_re 711.7471293471058

{"global_step": 260000, "eval_re": [852.2458665506193, 745.545478075379, 
746.6701964398454, 527.9417132367914, 504.8402686484846, 558.6989558263455, 
623.1325307312061, 820.916225930017, 798.3196918173325, 939.1603662150361], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [4:57:30<9:34:29, 21.18it/s]global step 270000, trans_decision ep_re 790.8729386220023

{"global_step": 270000, "eval_re": [795.453519916883, 805.0796345742746, 
588.018844301157, 990.2925265469999, 532.332561187588, 801.6959368514451, 
795.6023959688384, 1162.6554942961232, 776.2532567406951, 661.3452158360176], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [5:08:40<9:29:54, 21.06it/s]global step 280000, trans_decision ep_re 580.3055356164575

{"global_step": 280000, "eval_re": [627.2177234539416, 497.2398088274534, 
539.3958793210182, 256.02569511131946, 504.38773007789206, 618.5650505747712, 
787.1970076511828, 553.6761066787388, 798.5647892687668, 620.7855651994919], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [5:19:50<9:22:22, 21.04it/s]global step 290000, trans_decision ep_re 623.5870139173851

{"global_step": 290000, "eval_re": [586.3862154142108, 644.0311914009854, 
597.2917361998486, 631.2179971316059, 467.208455616085, 704.5051942639167, 
619.0801569910891, 717.2372246019653, 645.356893531186, 623.555074022958], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [5:31:00<9:09:10, 21.24it/s]global step 300000, trans_decision ep_re 620.5491729188324

{"global_step": 300000, "eval_re": [909.5716822578823, 270.8812144551769, 
802.7871927965991, 728.2057337169053, 478.78742206870817, 993.7352417032397, 
582.4122689996658, 322.96935390441695, 376.72498019193193, 739.4166390937977], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [5:42:10<9:03:38, 21.15it/s]global step 310000, trans_decision ep_re 723.0330243690638

{"global_step": 310000, "eval_re": [663.0207058464894, 676.5833706727241, 
601.0921751655516, 973.9840526655835, 609.1301113614707, 1097.8815694520865, 
709.4822965711377, 621.2068293012409, 660.2138562050911, 617.7352764492629], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [5:53:20<8:56:15, 21.13it/s]global step 320000, trans_decision ep_re 547.6597004307157

{"global_step": 320000, "eval_re": [517.1452049478504, 544.7070340454915, 
684.1147122944169, 531.0306844753301, 743.1978772966728, 493.8900134820607, 
337.8065804235001, 478.44602966041066, 521.2761239198147, 624.9827437616107], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [6:04:30<8:48:15, 21.14it/s]global step 330000, trans_decision ep_re 645.7691719655079

{"global_step": 330000, "eval_re": [740.3669160644365, 570.3103783971579, 
657.8747499898242, 906.0820613283123, 664.5468476428823, 627.7508460679236, 
559.7769929848589, 613.9586409182568, 631.1583994503918, 485.86588681103535], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [6:15:40<8:39:28, 21.18it/s]global step 340000, trans_decision ep_re 687.0159222708339

{"global_step": 340000, "eval_re": [819.7870749257448, 641.410136079422, 
1105.6969510445017, 524.2730625786184, 602.3147850540514, 709.1072439812763, 
613.0150773270198, 596.3111116104827, 682.4167249920522, 575.8270551151703], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [6:26:50<8:33:13, 21.11it/s]global step 350000, trans_decision ep_re 572.9280441891397

{"global_step": 350000, "eval_re": [375.25892512588604, 636.524333409014, 
598.8572550688241, 611.5628683242081, 598.5072751853739, 531.5322874868652, 
680.7450706888619, 633.8513893087772, 485.14837325665684, 577.2926640369287], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [6:37:50<8:26:29, 21.06it/s]global step 360000, trans_decision ep_re 624.2432584076685

{"global_step": 360000, "eval_re": [712.6558061531302, 560.2400153712496, 
605.2390336820716, 647.0542437937379, 518.2764052365287, 674.0463570203297, 
586.935390991408, 713.8827741168392, 505.0918921321812, 719.0106655792085], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [6:49:00<8:08:21, 21.50it/s]global step 370000, trans_decision ep_re 542.4338664690483

{"global_step": 370000, "eval_re": [633.1639519754503, 601.7188649587586, 
407.06925538741297, 604.3256228884192, 515.2497980974432, 634.62924724859, 
634.9230955178328, 350.4877566575899, 528.3582663043284, 514.4128056546577], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [7:00:00<8:07:39, 21.19it/s]global step 380000, trans_decision ep_re 664.4762794777007

{"global_step": 380000, "eval_re": [787.2833485087251, 529.286209527439, 
536.6083854868299, 675.3791334035651, 1251.0658074086725, 406.06902043620175, 
760.255209171847, 506.309340717011, 659.243167958892, 533.2631721578233], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [7:11:10<7:52:34, 21.51it/s]global step 390000, trans_decision ep_re 680.2983583758512

{"global_step": 390000, "eval_re": [908.4097113849746, 709.2850255773794, 
545.2034051569739, 961.6745268816732, 621.7790202065302, 606.4492927643491, 
603.9316511933649, 665.2920743120944, 571.0859510919461, 609.8729251892267], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [7:22:10<7:47:47, 21.38it/s]global step 400000, trans_decision ep_re 455.49944152749157

{"global_step": 400000, "eval_re": [445.82701275421584, 485.0259425987636, 
271.33875756692385, 687.1760885434344, 521.749088580841, 526.0991810701595, 
193.5446017004917, 494.2671159876803, 662.9128346728787, 267.0537917995273], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [7:33:20<7:43:39, 21.21it/s]global step 410000, trans_decision ep_re 573.4625357206751

{"global_step": 410000, "eval_re": [765.7410778188156, 487.1606754756675, 
720.1910157533047, 518.037292221781, 525.0952706206788, 485.8272148945432, 
417.26884577534173, 689.3173358522395, 730.7968591357476, 395.18976965863123], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [7:44:20<7:29:44, 21.49it/s]global step 420000, trans_decision ep_re 634.0247688885096

{"global_step": 420000, "eval_re": [627.2016901886223, 648.6654854204882, 
663.0386765954288, 854.5996541369775, 584.4361126977905, 575.628186487209, 
719.7836211972459, 703.9431020924984, 495.7450486545165, 467.2061114143186], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [7:55:20<7:28:46, 21.17it/s]global step 430000, trans_decision ep_re 640.1320440456349

{"global_step": 430000, "eval_re": [542.6954634736693, 415.6273300927929, 
934.1728450315597, 599.9439015017188, 652.5778278350753, 730.6958064792138, 
647.7059578213274, 471.22003508131905, 641.9214793021479, 764.759793837525], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439998/1000000 [8:06:30<7:12:33, 21.58it/s]global step 440000, trans_decision ep_re 421.59997233039286

{"global_step": 440000, "eval_re": [277.6344575436185, 343.1741579889784, 
381.98719437875724, 237.1417117408035, 529.9605971765376, 544.7340840790857, 
544.9542095237581, 410.2645326293554, 470.7888947133163, 475.3598835297178], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [8:17:30<7:07:49, 21.43it/s]global step 450000, trans_decision ep_re 548.7609048542373

{"global_step": 450000, "eval_re": [649.9061103026769, 478.5394085172913, 
499.57192456679616, 598.4769830846708, 339.3731707368933, 552.6640440866385, 
553.3510526138348, 806.1257205286964, 352.7728143901607, 656.8278197147143], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [8:28:30<7:00:32, 21.40it/s]global step 460000, trans_decision ep_re 607.4767676650563

{"global_step": 460000, "eval_re": [624.0492413406095, 798.3109702215146, 
549.2622530918103, 621.4273413544088, 508.10283113059563, 520.7754086879542, 
522.1370317981551, 534.0843703814946, 729.3478554822997, 667.2703731617204], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [8:39:40<7:00:32, 21.00it/s]global step 470000, trans_decision ep_re 540.6061722756929

{"global_step": 470000, "eval_re": [487.85092401387357, 504.51097303039813, 
552.0478026653151, 625.2576945186926, 476.7242461160901, 538.6287804683941, 
519.958383706527, 481.46886699385993, 581.8838967379018, 637.7301545058762], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [8:50:40<6:46:01, 21.35it/s]global step 480000, trans_decision ep_re 527.8174921154587

{"global_step": 480000, "eval_re": [470.4849374836117, 408.056701103577, 
373.06621656314354, 484.69113063712393, 687.3654382295224, 509.308377582711, 
535.5608708358224, 386.4228563360067, 789.6300687099098, 633.5883236731579], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489998/1000000 [9:01:40<6:35:17, 21.50it/s]global step 490000, trans_decision ep_re 564.792066077958

{"global_step": 490000, "eval_re": [606.7923031765982, 637.1618682517974, 
464.68528655207405, 696.3344581251788, 522.6794872522307, 582.2003432889435, 
594.6553624693472, 526.0386029892383, 360.89669277415084, 656.4762559000197], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [9:12:40<6:23:25, 21.73it/s]global step 500000, trans_decision ep_re 616.1888340886002

{"global_step": 500000, "eval_re": [500.35657219073556, 508.6398460363347, 
706.2307777421279, 502.8436166158012, 575.6681295301566, 633.0765974282036, 
666.9675254457102, 558.2700578213311, 1027.9905843724432, 481.84463370315865], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [9:23:50<6:24:46, 21.22it/s]global step 510000, trans_decision ep_re 533.398076065838

{"global_step": 510000, "eval_re": [450.4991221623906, 444.19194951019335, 
465.5929367732642, 586.3099437204971, 509.2756498487486, 544.3555112366361, 
506.572571753407, 750.5477807637636, 666.388472478396, 410.2468224110842], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519998/1000000 [9:34:50<6:11:45, 21.52it/s]global step 520000, trans_decision ep_re 407.472335021513

{"global_step": 520000, "eval_re": [514.6465526823232, 297.1250222178928, 
463.9901097670807, 372.60715878656737, 540.4828023211638, 486.67679699408467, 
189.71544678763365, 251.26420201726916, 500.80940692756616, 457.4058517135485], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529997/1000000 [9:45:50<6:05:42, 21.42it/s]global step 530000, trans_decision ep_re 560.4799029782266

{"global_step": 530000, "eval_re": [498.31232999038815, 488.4569742095153, 
607.6393001109712, 608.7410165292755, 489.9876089743625, 512.6866424800858, 
552.558401137907, 485.84375913675956, 847.2720531991127, 513.3009440138884], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [9:56:50<6:02:09, 21.17it/s]global step 540000, trans_decision ep_re 584.1055878724949

{"global_step": 540000, "eval_re": [706.5851551111838, 467.6929228412649, 
725.1763855360249, 788.638054314107, 575.2536748294359, 555.9628840224035, 
478.4005352214759, 528.0519107473895, 422.98930500438837, 592.3050510972753], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549998/1000000 [10:08:00<5:50:10, 21.42it/s]global step 550000, trans_decision ep_re 604.8660738682416

{"global_step": 550000, "eval_re": [706.4155362653904, 437.7692575442349, 
663.166805024818, 626.6277809375678, 649.2439795295636, 615.9144685622665, 
637.0102208947379, 698.3240021149086, 558.011296271163, 456.1773915377651], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [10:19:00<5:42:36, 21.40it/s]global step 560000, trans_decision ep_re 578.8399101682353

{"global_step": 560000, "eval_re": [802.9871279183343, 564.5676739952594, 
785.9319186852093, 518.029435351547, 576.3040783395118, 434.71316862334425, 
629.1473974659218, 536.8013922654317, 509.2588468666302, 430.6580621711634], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [10:30:00<5:34:36, 21.42it/s]global step 570000, trans_decision ep_re 578.7888073103045

{"global_step": 570000, "eval_re": [551.7859964518358, 797.6973456337058, 
471.1447852985741, 469.8617306051479, 517.7523520004627, 609.3618270122961, 
920.381750417272, 489.69119822729226, 390.37057574612106, 569.8405117103375], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [10:41:10<5:25:56, 21.48it/s]global step 580000, trans_decision ep_re 598.8177170719929

{"global_step": 580000, "eval_re": [481.3135548382128, 472.42586509181837, 
465.4867644310465, 531.1619352074557, 795.5059230648078, 588.665109005075, 
603.2366654475519, 645.845092086797, 837.1356348292825, 567.4006267178798], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589998/1000000 [10:52:10<5:15:27, 21.66it/s]global step 590000, trans_decision ep_re 613.4157098670208

{"global_step": 590000, "eval_re": [633.088874707303, 537.8785849777321, 
705.2309180213897, 626.2286379275109, 605.7951171914318, 665.9915594487941, 
600.4721818644466, 528.3144913373792, 471.9024810853507, 759.2542521088694], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [11:03:10<5:12:18, 21.35it/s]global step 600000, trans_decision ep_re 488.2063111200611

{"global_step": 600000, "eval_re": [277.82050857183873, 485.3513172348104, 
460.61411312363725, 514.508528416504, 425.6747507496949, 616.8101275527181, 
568.9527395907015, 471.3737386074873, 619.5965216028421, 441.36076575037714], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [11:14:20<5:07:14, 21.16it/s]global step 610000, trans_decision ep_re 242.75071597512775

{"global_step": 610000, "eval_re": [124.07196294856921, 476.8052341664015, 
590.3015551126932, 160.5054181218692, -95.6923122360265, 194.54022091099583, 
241.55290244521993, 286.03729231300633, 361.62975766641034, 87.7551283021378], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [11:25:30<4:57:49, 21.27it/s]global step 620000, trans_decision ep_re 497.28650911504053

{"global_step": 620000, "eval_re": [526.0246651521214, 513.731635652355, 
544.7612540286631, 528.346287581844, 490.014541060078, 530.1719509817312, 
341.3225138328946, 441.4810710443546, 509.2176629773023, 547.793508839061], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629998/1000000 [11:36:30<4:46:16, 21.54it/s]global step 630000, trans_decision ep_re 624.2869936593078

{"global_step": 630000, "eval_re": [645.0492905698275, 643.0151872315981, 
384.253319116627, 683.4139140923185, 520.5833223640337, 763.5476197150696, 
494.6267888857375, 708.4075448281172, 639.0638744723678, 760.909075317381], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [11:47:30<4:45:38, 21.01it/s]global step 640000, trans_decision ep_re 493.1723427101866

{"global_step": 640000, "eval_re": [431.87924217050755, 351.61293808962887, 
355.2791233906009, 583.2129548349798, 517.9866978215101, 483.0887084275428, 
610.7198784709884, 501.07583438751, 524.7937331069085, 572.0743164016881], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [11:58:40<4:32:17, 21.42it/s]global step 650000, trans_decision ep_re 573.3789850193692

{"global_step": 650000, "eval_re": [653.4288060817335, 382.0269028550559, 
464.65610060736014, 491.0983500850829, 596.9428609485702, 792.0146624448901, 
608.6467780179657, 683.4880777439686, 527.83634974661, 533.6509616624562], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [12:09:40<4:25:35, 21.34it/s]global step 660000, trans_decision ep_re 576.0696483772415

{"global_step": 660000, "eval_re": [647.3607228180923, 453.0426626077108, 
860.6199993050651, 540.6694415624313, 598.4781704897873, 605.8479322414042, 
542.9797920466615, 484.0605358062311, 502.51569085855436, 525.1215360364765], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [12:20:40<4:14:55, 21.58it/s]global step 670000, trans_decision ep_re 577.4556864208686

{"global_step": 670000, "eval_re": [727.4371892714863, 691.9688094873543, 
574.3663257764352, 540.1827048884031, 646.3576961375477, 422.8783829454956, 
477.09918856233696, 342.86239926590014, 840.7602063913739, 510.64396148235187], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [12:31:40<4:06:54, 21.60it/s]global step 680000, trans_decision ep_re 518.1298822674701

{"global_step": 680000, "eval_re": [561.1900055898345, 460.7746154176627, 
467.95669029068387, 531.0740002873408, 559.7282503520601, 577.1992456314204, 
551.860272221829, 572.9552796262944, 535.8793872903658, 362.68107596720904], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689998/1000000 [12:42:40<3:58:04, 21.70it/s]global step 690000, trans_decision ep_re 640.8462078066879

{"global_step": 690000, "eval_re": [649.3684937743466, 675.6224456951579, 
677.7890127925701, 606.2274775315454, 659.8966980980947, 456.28882498319354, 
596.2985831327552, 591.4026923456547, 758.4240849340854, 737.143764779475], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [12:53:40<3:51:28, 21.60it/s]global step 700000, trans_decision ep_re 602.080464535804

{"global_step": 700000, "eval_re": [718.1352225869662, 495.07987628710396, 
589.387723240348, 570.6357087512356, 702.7657155500999, 818.6934295009369, 
470.27835858354433, 462.04867648624605, 556.2989739855542, 637.480960386005], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [13:04:30<3:44:33, 21.52it/s]global step 710000, trans_decision ep_re 520.6611182196159

{"global_step": 710000, "eval_re": [522.6960815006476, 757.8693354578186, 
697.4130709736548, 531.1503681399597, 436.86595645486915, 384.56839072000355, 
387.9357805732693, 549.3421158713668, 541.0689642885558, 397.701118216015], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [13:15:30<3:36:01, 21.60it/s]global step 720000, trans_decision ep_re 435.9237161711684

{"global_step": 720000, "eval_re": [268.3174112419445, 574.0694286087922, 
464.4596061461947, 580.2107354853465, 384.7968729301029, 432.6932185922161, 
347.2642568383394, 357.572512748885, 392.89848808504155, 556.9546310348212], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729998/1000000 [13:26:30<3:25:57, 21.85it/s]global step 730000, trans_decision ep_re 525.5395236116888

{"global_step": 730000, "eval_re": [289.8973765522023, 617.9878015859849, 
562.5907508358408, 704.2690904070121, 540.4435554179821, 577.9569854844075, 
509.3857620738033, 399.78241843929027, 355.8192629312068, 697.262232389157], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739998/1000000 [13:37:30<3:19:10, 21.76it/s]global step 740000, trans_decision ep_re 625.9451394731725

{"global_step": 740000, "eval_re": [709.6535728946594, 732.3977022749465, 
429.9402501504492, 548.3836514369265, 716.237996501725, 602.598881814463, 
552.1220558090872, 509.49815622744217, 462.437107224166, 996.1820203978605], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [13:48:30<3:13:05, 21.58it/s]global step 750000, trans_decision ep_re 448.4923413270296

{"global_step": 750000, "eval_re": [481.2962284616671, 401.3372431001318, 
286.79259548127953, 484.4661745063895, 542.9160357263781, 504.0954256212298, 
484.44957014483583, 428.7825875446256, 511.39320685886656, 359.39434582489196], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [13:59:20<3:06:02, 21.50it/s]global step 760000, trans_decision ep_re 623.2582091262061

{"global_step": 760000, "eval_re": [723.902441645856, 407.3445755822172, 
615.674032274757, 595.3900632534184, 779.4394866760922, 526.4517438680565, 
374.7959127375888, 659.4734014239958, 678.779658579509, 871.3307752205699], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769997/1000000 [14:10:20<2:56:41, 21.70it/s]global step 770000, trans_decision ep_re 432.61947874835124

{"global_step": 770000, "eval_re": [-269.04040660092255, 515.2581502215944, 
587.8149701010241, 373.30163548270355, 461.0500137663983, 583.2799176095871, 
634.8822271103354, 522.721554322037, 380.59637917057245, 536.3303463001818], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779999/1000000 [14:21:20<2:49:46, 21.60it/s]global step 780000, trans_decision ep_re 481.1708940801744

{"global_step": 780000, "eval_re": [437.30210569157657, 508.775187564644, 
491.1559756632218, 594.2065486360674, 458.93278352023003, 519.6305508034386, 
476.5665358010982, 505.2193580753381, 300.63532817682625, 519.284566869303], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [14:32:10<2:42:14, 21.57it/s]global step 790000, trans_decision ep_re 622.6913400422271

{"global_step": 790000, "eval_re": [689.7793078628625, 503.06177867411185, 
709.9130429479563, 563.7632216231473, 689.2269603662613, 766.4495451756582, 
438.0675710288671, 654.2398464494927, 625.8657989622103, 586.5463273317024], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [14:43:10<2:32:27, 21.87it/s]global step 800000, trans_decision ep_re 512.0997250686547

{"global_step": 800000, "eval_re": [762.8819072892578, 476.89526357568286, 
533.3740166265675, 531.7465204753059, 529.6254303727154, 394.0749424990148, 
450.8396814264997, 483.38406129175684, 434.98645357824415, 523.1889735515026], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [14:54:10<2:28:05, 21.38it/s]global step 810000, trans_decision ep_re 542.3181923545579

{"global_step": 810000, "eval_re": [620.1621194713542, 445.07982449755764, 
669.8878733627829, 587.4078601008827, 616.613098198206, 676.6845854439641, 
574.3347794737541, 445.2162755779067, 248.36836779413312, 539.4271396250382], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [15:05:10<2:19:50, 21.45it/s]global step 820000, trans_decision ep_re 442.09789416041406

{"global_step": 820000, "eval_re": [539.3103661377805, 489.0535577549933, 
282.2111035323665, 318.4388321015894, 636.1169744719024, 679.0830158760837, 
313.18935238180285, 97.2972279501797, 492.80924214866224, 573.4692692487799], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [15:16:00<2:11:01, 21.62it/s]global step 830000, trans_decision ep_re 479.7082256124592

{"global_step": 830000, "eval_re": [660.4382189204135, 605.7369741615561, 
430.55955740642884, 374.09305801272717, 532.5449726645326, 347.10831321167404, 
213.49359996772043, 469.72475245812996, 608.6137579865741, 554.7690513348354], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [15:27:00<2:04:21, 21.44it/s]global step 840000, trans_decision ep_re 605.9570852431799

{"global_step": 840000, "eval_re": [503.415297783157, 534.0503344441368, 
555.6365396347915, 521.2997954967029, 667.4219399518091, 515.37924570211, 
607.9993415120375, 604.367100568246, 589.7581527738088, 960.2431045649992], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [15:38:00<1:56:31, 21.45it/s]global step 850000, trans_decision ep_re 557.892764387309

{"global_step": 850000, "eval_re": [578.8630916801126, 636.260845144571, 
635.4643693216406, 604.1961392074646, 490.8357157795539, 511.1744466965893, 
481.73100792623615, 574.2484884567873, 457.2075838772865, 608.9459557828476], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [15:49:00<1:48:16, 21.55it/s]global step 860000, trans_decision ep_re 558.6251007040465

{"global_step": 860000, "eval_re": [652.0798672099439, 560.6104157163436, 
612.2365729558604, 422.42245312510056, 491.7077255910548, 783.3646244584237, 
506.0235466415602, 580.4058197343552, 501.52005473515266, 475.8799268726702], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869998/1000000 [16:00:00<1:39:41, 21.73it/s]global step 870000, trans_decision ep_re 500.5955279877241

{"global_step": 870000, "eval_re": [533.5706375636607, 449.3495056111089, 
551.794622793757, 639.1723193726463, 399.75876952888115, 475.73434850494294, 
393.33262848364143, 406.17634019305626, 557.0387688182578, 600.0273390072892], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [16:11:00<1:32:36, 21.60it/s]global step 880000, trans_decision ep_re 601.4760057921441

{"global_step": 880000, "eval_re": [667.1188375262001, 481.2673886898879, 
491.2660131241515, 739.9217064578629, 581.18735498064, 514.5239958006407, 
576.5617180781163, 517.2426123444652, 613.5917566316241, 832.0786742878521], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [16:22:01<1:25:09, 21.53it/s]global step 890000, trans_decision ep_re 547.3227501078956

{"global_step": 890000, "eval_re": [582.4526642860312, 605.0212961143683, 
586.8041030540462, 552.9115725366674, 500.7700977823366, 565.7011626524347, 
306.5210122949805, 545.5551679263712, 560.3600077166543, 667.1304167150655], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [16:33:01<1:17:35, 21.48it/s]global step 900000, trans_decision ep_re 451.91048821802553

{"global_step": 900000, "eval_re": [404.3574701653707, 246.68328729384788, 
449.0801174138999, 538.7147129519111, 532.8264227374664, 574.4570540007944, 
583.9819083994389, 133.253197992584, 508.59402406278986, 547.156687162152], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [16:43:51<1:09:38, 21.54it/s]global step 910000, trans_decision ep_re 558.9261563994612

{"global_step": 910000, "eval_re": [525.7402867425699, 582.7359342709941, 
478.46446828423524, 525.5363792266319, 599.3821099212835, 672.634684199831, 
603.0635220688839, 643.8102430964203, 654.740299349443, 303.1536368343193], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919998/1000000 [16:54:51<1:01:04, 21.83it/s]global step 920000, trans_decision ep_re 530.2132744265884

{"global_step": 920000, "eval_re": [709.0512091273365, 523.1115266677415, 
353.36507358313776, 552.2391065510116, 747.5167407546469, 383.1383742547685, 
422.17519656472825, 590.602710743817, 482.60850839773553, 538.3242976209615], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [17:05:41<54:28, 21.41it/s]global step 930000, trans_decision ep_re 527.3898264683343

{"global_step": 930000, "eval_re": [612.3134879099082, 504.56932373063523, 
511.1931451536316, 581.4189400152127, 410.2293430760425, 535.5163144396075, 
413.31566615732993, 450.4710389953971, 623.9530951563027, 630.9179100492753], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [17:16:41<46:09, 21.66it/s]global step 940000, trans_decision ep_re 604.6562488306246

{"global_step": 940000, "eval_re": [752.051924257777, 571.4218940113243, 
641.7944655424157, 589.0828216514965, 561.1105148033369, 711.4561405355523, 
458.8464290352331, 555.9756247439021, 631.9588209314704, 572.8638527937367], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949997/1000000 [17:27:41<38:48, 21.48it/s]global step 950000, trans_decision ep_re 514.2322131116655

{"global_step": 950000, "eval_re": [516.9996070638697, 185.80282476956066, 
618.1582936853064, 591.3654772968274, 527.1293814021594, 508.5970643040346, 
559.6192898722043, 589.4012933159423, 536.0938578317556, 509.15504157499436], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959998/1000000 [17:38:41<30:34, 21.80it/s]global step 960000, trans_decision ep_re 511.7571732887395

{"global_step": 960000, "eval_re": [447.9446869153733, 468.1901122680451, 
443.46855893101235, 586.0928622079999, 523.9704093452065, 530.6108509017535, 
499.08697439175023, 534.9212034288761, 489.22456786227013, 594.0615066351085], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969998/1000000 [17:49:41<22:57, 21.77it/s]global step 970000, trans_decision ep_re 532.0471174584745

{"global_step": 970000, "eval_re": [507.9796275557286, 587.7685996437589, 
427.104719728668, 556.5354054918594, 551.7042331997638, 631.6766834108994, 
490.5381910393342, 584.6317882888876, 297.6121829681327, 684.9197432577129], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [18:00:41<15:27, 21.56it/s]global step 980000, trans_decision ep_re 462.46643158750186

{"global_step": 980000, "eval_re": [674.2708251626103, 567.2466458871936, 
441.2306550312437, 430.28238695861654, 383.87181435607476, 533.6319776060694, 
459.22304187423487, 542.2573933552707, -27.38199869242603, 620.03157433613], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [18:11:31<07:41, 21.66it/s]global step 990000, trans_decision ep_re 546.3631195064412

{"global_step": 990000, "eval_re": [520.0461520451812, 691.2875751841517, 
595.5044803139698, 608.382445490473, 550.3164390724945, 553.7216009832246, 
305.6220913310655, 551.7767684736457, 553.0763197790529, 533.8973223911522], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [18:22:31<00:00, 21.59it/s]global step 1000000, trans_decision ep_re 523.4880905551479

{"global_step": 1000000, "eval_re": [605.803240408411, 608.1036420470846, 
489.72617046423943, 673.7582594235704, 482.72336381688496, 662.0023928052498, 
522.3682672751809, 499.8320761185081, 348.3166390876876, 342.24685410466225], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [18:23:09<00:00, 15.11it/s]
