
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:14<10:30:46, 26.16it/s]global step 10000, trans_decision ep_re 60.72530014496309

{"global_step": 10000, "eval_re": [16.581467290318827, 18.62807142066582, 
26.153123024074276, 189.01250371629345, 23.346557953919678, 21.22379873119987, 
72.35960326092996, 121.46077614920604, 59.32924442827707, 59.15785547474594], 
"eval_len": [24, 21, 31, 142, 22, 20, 46, 97, 38, 67]}

  2%|▏         | 19999/1000000 [12:42<10:31:21, 25.87it/s]global step 20000, trans_decision ep_re 14.499082275703262

{"global_step": 20000, "eval_re": [17.985301428689937, 9.50143513373327, 
9.63187036497686, 11.400549175106455, 20.138026648767266, 10.551556686759069, 
11.602142860253984, 23.046021038551743, 18.24977685547028, 12.884142564723751], 
"eval_len": [20, 17, 12, 17, 20, 13, 18, 23, 20, 22]}

  3%|▎         | 29997/1000000 [21:11<10:28:29, 25.72it/s]global step 30000, trans_decision ep_re 37.38698552139839

{"global_step": 30000, "eval_re": [18.085604085044753, 20.57138263975488, 
10.012045607602918, 9.572589624550984, 22.726426870325902, 13.283778806475743, 
10.875965248291855, 132.3508404064596, 102.74892153554191, 33.6423003899353], 
"eval_len": [20, 25, 18, 15, 25, 15, 16, 86, 80, 26]}

  4%|▍         | 39997/1000000 [29:43<10:20:18, 25.79it/s]global step 40000, trans_decision ep_re 72.39823458192933

{"global_step": 40000, "eval_re": [14.873387646980857, 78.73551593170791, 
55.90460274427789, 17.499507906957074, 149.40006547739011, 85.4650826082318, 
19.06485185685718, 149.23546571374274, 74.86904076880707, 78.93482516434062], 
"eval_len": [19, 52, 67, 22, 86, 52, 18, 85, 52, 63]}

  5%|▍         | 49997/1000000 [38:12<10:13:12, 25.82it/s]global step 50000, trans_decision ep_re 21.775352586685084

{"global_step": 50000, "eval_re": [9.773809014013937, 20.514226745375538, 
12.581597438390617, 20.818308972498517, 77.53426327554143, 13.657601001407148, 
16.8288311387278, 10.196660793105634, 14.201626699998263, 21.64660078779199], 
"eval_len": [18, 25, 22, 25, 54, 20, 21, 13, 22, 24]}

  6%|▌         | 59998/1000000 [46:41<10:02:13, 26.01it/s]global step 60000, trans_decision ep_re 20.803739478284893

{"global_step": 60000, "eval_re": [12.906545691620378, 12.790374664525592, 
17.49860282977638, 12.536843369429501, 20.84695253261039, 74.60238789943998, 
12.124956559369984, 23.228824647635648, 8.319311805098163, 13.182594783342926], 
"eval_len": [18, 14, 25, 18, 20, 58, 15, 25, 16, 18]}

  7%|▋         | 69999/1000000 [55:10<9:55:32, 26.03it/s]global step 70000, trans_decision ep_re 27.671322680435207

{"global_step": 70000, "eval_re": [20.071374316779906, 20.185210690177456, 
117.80721793784706, 16.653789424761783, 12.634378987709605, 14.92290256816644, 
34.87136514432738, 11.85657375285792, 11.390627593698381, 16.319786388026145], 
"eval_len": [26, 23, 77, 21, 27, 20, 39, 13, 25, 22]}

  8%|▊         | 79999/1000000 [1:03:50<10:02:48, 25.44it/s]global step 80000, trans_decision ep_re 42.62177499819572

{"global_step": 80000, "eval_re": [20.426856179419172, 8.544667117027291, 
90.52227553717435, 119.41018334393593, 13.690527766982473, 9.168248986131026, 
18.441507664221966, 111.85600184767311, 18.990074275210503, 15.167407264181334],
"eval_len": [21, 12, 50, 79, 24, 13, 28, 75, 23, 20]}

  9%|▉         | 89999/1000000 [1:12:20<9:42:24, 26.04it/s]global step 90000, trans_decision ep_re 27.92225043373825

{"global_step": 90000, "eval_re": [147.03718338437872, 12.333324554578882, 
11.895643065793994, 9.857973791550627, 14.420709688393947, 13.642902330288587, 
18.07135517126439, 17.12914079710925, 17.260263735695887, 17.57400781832817], 
"eval_len": [85, 16, 20, 20, 20, 19, 27, 23, 23, 27]}

 10%|▉         | 99999/1000000 [1:20:50<9:38:10, 25.94it/s]global step 100000, trans_decision ep_re 42.17516326295974

{"global_step": 100000, "eval_re": [28.21392730592482, 73.49003982321602, 
116.68882408888011, 12.891837448674018, 99.40382679994794, 11.603117687517678, 
21.89011109206052, 25.772495470594954, 17.028210257801184, 14.76924265498019], 
"eval_len": [27, 59, 67, 18, 54, 13, 23, 33, 23, 19]}

 11%|█         | 109999/1000000 [1:29:20<9:31:01, 25.98it/s]global step 110000, trans_decision ep_re 41.958487056798845

{"global_step": 110000, "eval_re": [13.741837344945626, 23.529919761272776, 
167.86090602179004, 9.485731805440256, 12.95240617176741, 90.57592878582692, 
31.144920570420762, 24.014638436594165, 12.430239438610826, 33.84834223131963], 
"eval_len": [17, 23, 98, 16, 15, 68, 29, 23, 15, 33]}

 12%|█▏        | 119998/1000000 [1:37:35<9:18:14, 26.27it/s]global step 120000, trans_decision ep_re 16.985370389264236

{"global_step": 120000, "eval_re": [12.851947874091232, 9.48376746464897, 
13.570953038450295, 20.557036402708075, 19.820757752506985, 6.835537960330053, 
28.30496218575845, 20.08168040565942, 17.06263375861346, 21.284427049875422], 
"eval_len": [19, 13, 20, 24, 24, 12, 26, 44, 22, 23]}

 13%|█▎        | 129997/1000000 [1:46:04<9:23:10, 25.75it/s]global step 130000, trans_decision ep_re 30.148463995245585

{"global_step": 130000, "eval_re": [11.010883174849615, 12.053947189238531, 
122.25405461941267, 9.931495368603871, 11.939262953975694, 85.30506896655793, 
14.870488386510653, 12.045549435988788, 9.25970624463942, 12.814183612678653], 
"eval_len": [16, 17, 81, 20, 25, 55, 22, 17, 17, 16]}

 14%|█▍        | 139997/1000000 [1:54:50<9:16:43, 25.75it/s]global step 140000, trans_decision ep_re 42.257153893452895

{"global_step": 140000, "eval_re": [127.76900392218299, 23.96018261561462, 
120.41018024439373, 21.021888688414705, 20.004367652022577, 63.724795902240416, 
13.29407267871706, 7.601394978580169, 13.331751887552787, 11.453900364809913], 
"eval_len": [87, 23, 82, 21, 24, 72, 17, 13, 15, 20]}

 15%|█▍        | 149997/1000000 [2:03:20<9:10:57, 25.71it/s]global step 150000, trans_decision ep_re 50.011617656543116

{"global_step": 150000, "eval_re": [154.42141703830782, 177.71761092167594, 
21.52505576830443, 43.69240170199107, 12.896679658244567, 11.226361005697376, 
12.91525385856504, 29.558576169528394, 23.043662121560175, 13.119158321556363], 
"eval_len": [77, 104, 20, 87, 24, 13, 15, 34, 27, 17]}

 16%|█▌        | 159998/1000000 [2:11:50<8:51:19, 26.35it/s]global step 160000, trans_decision ep_re 63.270441987312665

{"global_step": 160000, "eval_re": [12.049802083283767, 13.148740277187253, 
153.48986037354783, 44.05836402999665, 12.40184076783918, 90.53349305372248, 
136.22650687256032, 40.97178481296369, 18.70166945249936, 111.12235814952618], 
"eval_len": [14, 15, 77, 40, 20, 69, 86, 39, 27, 68]}

 17%|█▋        | 169998/1000000 [2:20:05<8:41:16, 26.54it/s]global step 170000, trans_decision ep_re 37.871195767259984

{"global_step": 170000, "eval_re": [13.11672283350045, 9.815241683851712, 
15.42901639198564, 12.328964759318135, 23.527805888996234, 7.64673205133598, 
17.680267451552663, 33.72452783215098, 116.02469919916807, 129.41797958074], 
"eval_len": [20, 14, 18, 17, 23, 10, 18, 29, 86, 85]}

 18%|█▊        | 179997/1000000 [2:28:35<8:47:05, 25.93it/s]global step 180000, trans_decision ep_re 58.243581493715155

{"global_step": 180000, "eval_re": [97.23681439700574, 276.1194907039862, 
15.011862610527071, 20.192825438810615, 10.380671652974407, 16.843934749649627, 
12.737115369071407, 19.105915045121368, 101.60553723543683, 13.20164773456842], 
"eval_len": [57, 170, 20, 19, 12, 20, 18, 23, 55, 18]}

 19%|█▉        | 189999/1000000 [2:37:20<8:44:13, 25.75it/s]global step 190000, trans_decision ep_re 60.8406795569319

{"global_step": 190000, "eval_re": [128.63122836364477, 18.286454286681906, 
16.26574640718073, 9.6098616416996, 102.47263283985353, 11.671077410950089, 
8.347401761096828, 15.961907229035203, 24.398922126405157, 272.76156350277114], 
"eval_len": [89, 25, 16, 11, 68, 14, 10, 21, 24, 119]}

 20%|█▉        | 199999/1000000 [2:45:50<8:38:31, 25.71it/s]global step 200000, trans_decision ep_re 53.408030256925336

{"global_step": 200000, "eval_re": [94.6054801531258, 34.834629710404386, 
68.17389396163345, 13.982049145850834, 15.179838748245654, 15.754425146006444, 
27.700796764015838, 22.75098993607393, 82.31374884719565, 158.78445015670135], 
"eval_len": [69, 43, 44, 16, 18, 16, 28, 29, 69, 82]}

 21%|██        | 209999/1000000 [2:54:20<8:28:33, 25.89it/s]global step 210000, trans_decision ep_re 38.4122068951949

{"global_step": 210000, "eval_re": [13.980917481827914, 27.08513331828904, 
24.110757150120246, 10.642717903662462, 17.378162909639947, 78.31630810895173, 
17.42207439259328, 63.4266902351362, 115.06173757405753, 16.697569877670666], 
"eval_len": [27, 24, 55, 13, 21, 61, 19, 42, 82, 20]}

 22%|██▏       | 219997/1000000 [3:02:36<8:25:24, 25.72it/s]global step 220000, trans_decision ep_re 22.917399533716953

{"global_step": 220000, "eval_re": [23.83194579239825, 22.646325569037632, 
21.236154801585826, 12.177833881783478, 23.825994759876423, 13.394321782466578, 
28.390465143335877, 11.225492597680317, 9.606122015257245, 62.8393389937479], 
"eval_len": [27, 30, 21, 15, 25, 21, 27, 16, 34, 60]}

 23%|██▎       | 229997/1000000 [3:11:03<8:21:16, 25.60it/s]global step 230000, trans_decision ep_re 36.347529618760134

{"global_step": 230000, "eval_re": [38.16878916656145, 85.03781562339282, 
80.01939278477556, 15.710226529120467, 16.776678336219064, 11.016168969883578, 
20.63469632537638, 15.082946121846513, 64.86337634063752, 16.165205989787992], 
"eval_len": [48, 48, 53, 20, 19, 17, 35, 22, 53, 16]}

 24%|██▍       | 239997/1000000 [3:19:35<8:06:48, 26.02it/s]global step 240000, trans_decision ep_re 38.76817526690498

{"global_step": 240000, "eval_re": [13.918983292755641, 23.215536424044245, 
17.26364616284785, 79.08872119374094, 10.938737660525833, 118.37128025756842, 
16.483845740389782, 15.479457665140815, 74.83218054818794, 18.089363723848383], 
"eval_len": [21, 26, 18, 66, 16, 81, 31, 16, 47, 23]}

 25%|██▍       | 249997/1000000 [3:28:04<8:04:49, 25.78it/s]global step 250000, trans_decision ep_re 45.91225488722851

{"global_step": 250000, "eval_re": [24.443814583597344, 113.97869670735633, 
12.441146053580828, 11.083798043866285, 93.53658767420552, 68.24842175930911, 
21.72924601491227, 13.537380610224988, 90.53906207148054, 9.58439535375181], 
"eval_len": [23, 74, 17, 21, 69, 52, 29, 23, 51, 15]}

 26%|██▌       | 259997/1000000 [3:36:34<7:55:29, 25.94it/s]global step 260000, trans_decision ep_re 29.92089153459454

{"global_step": 260000, "eval_re": [14.947657680266769, 31.344610377815354, 
82.42434065716522, 13.702147096546083, 14.240443681725226, 20.195084315172103, 
14.321041427867312, 70.77241164637464, 24.41834504671301, 12.842833416299687], 
"eval_len": [21, 25, 101, 19, 23, 31, 22, 74, 27, 28]}

 27%|██▋       | 269997/1000000 [3:45:05<7:51:57, 25.78it/s]global step 270000, trans_decision ep_re 59.767156508099085

{"global_step": 270000, "eval_re": [14.432001889028694, 106.46412446977864, 
11.161453079776464, 28.968649149811036, 110.0080037313124, 106.78748749096258, 
100.00149266946632, 11.798471681869481, 14.88763108280135, 93.16224983618378], 
"eval_len": [16, 77, 26, 27, 76, 68, 95, 20, 19, 63]}

 28%|██▊       | 279997/1000000 [3:53:35<7:42:12, 25.96it/s]global step 280000, trans_decision ep_re 45.690018941376714

{"global_step": 280000, "eval_re": [24.11008964887317, 86.36285678995874, 
27.467256307046778, 116.01006770158736, 23.409447207995967, 14.820390653548763, 
64.11293234889048, 75.5985177518842, 13.08618029150342, 11.922450712478213], 
"eval_len": [28, 62, 25, 84, 22, 20, 60, 82, 21, 18]}

 29%|██▉       | 289997/1000000 [4:02:04<7:37:25, 25.87it/s]global step 290000, trans_decision ep_re 43.87564463266038

{"global_step": 290000, "eval_re": [10.606575185840144, 94.45795994055288, 
129.89716112706867, 111.43137962229184, 20.343441006031824, 12.697897176484453, 
13.587706639796728, 16.881397995444807, 13.468507267407766, 15.384420365684752],
"eval_len": [15, 56, 77, 69, 28, 21, 17, 27, 18, 22]}

 30%|██▉       | 299999/1000000 [4:10:33<7:28:19, 26.02it/s]global step 300000, trans_decision ep_re 48.1813301548379

{"global_step": 300000, "eval_re": [23.43856367123937, 76.03570107441887, 
100.25620492328771, 7.0244049861814934, 14.724964678182245, 143.26081226981682, 
10.205113681835815, 22.808207937833043, 69.91541922979093, 14.143909095792722], 
"eval_len": [71, 51, 75, 9, 25, 107, 19, 23, 50, 22]}

 31%|███       | 309999/1000000 [4:19:02<7:25:08, 25.83it/s]global step 310000, trans_decision ep_re 31.38057959425824

{"global_step": 310000, "eval_re": [14.2052348837185, 99.25532107796951, 
9.312101121320064, 11.079067893863394, 10.210792262718956, 9.567859142399564, 
102.35115610871883, 20.2466844760215, 26.297411889933684, 11.280167085918352], 
"eval_len": [27, 57, 15, 21, 18, 16, 56, 19, 23, 16]}

 32%|███▏      | 319998/1000000 [4:27:31<7:12:42, 26.19it/s]global step 320000, trans_decision ep_re 55.098719841721696

{"global_step": 320000, "eval_re": [17.41397459431124, 130.26537282726977, 
12.40650527621186, 105.5758024140554, 12.242258150346878, 10.750766404436792, 
100.99054564242735, 15.11488735497737, 129.79939586789098, 16.427689885289244], 
"eval_len": [26, 65, 14, 78, 21, 14, 76, 23, 66, 18]}

 33%|███▎      | 329999/1000000 [4:36:10<7:08:37, 26.05it/s]global step 330000, trans_decision ep_re 41.69378837801064

{"global_step": 330000, "eval_re": [13.258839910715157, 105.83933954205355, 
12.487266524740935, 7.840118590654007, 106.5990678934459, 66.06154410368798, 
11.99573101917387, 11.221217263832447, 16.072983033284725, 65.56177589851787], 
"eval_len": [21, 71, 17, 10, 66, 45, 26, 14, 21, 76]}

 34%|███▍      | 339999/1000000 [4:44:26<7:01:38, 26.09it/s]global step 340000, trans_decision ep_re 31.855580280221204

{"global_step": 340000, "eval_re": [16.7407615853738, 12.16913571731592, 
83.46280659856416, 82.01369482521162, 43.83645922545862, 9.572881203112857, 
11.184984690794415, 17.81026945881109, 17.98635390102005, 23.77845559654955], 
"eval_len": [23, 19, 52, 58, 50, 14, 15, 23, 23, 24]}

 35%|███▍      | 349999/1000000 [4:52:52<6:56:12, 26.03it/s]global step 350000, trans_decision ep_re 26.350983323214827

{"global_step": 350000, "eval_re": [15.932181691880189, 17.215949493057497, 
90.26099638173882, 7.522925428945332, 12.487779966186803, 11.047343346973083, 
23.599162335415155, 67.61062154553127, 7.716410784389474, 10.11646225803061], 
"eval_len": [27, 20, 62, 12, 14, 15, 28, 62, 18, 16]}

 36%|███▌      | 359998/1000000 [5:01:30<6:42:38, 26.49it/s]global step 360000, trans_decision ep_re 55.52168748365729

{"global_step": 360000, "eval_re": [16.981921675598905, 19.42034373685009, 
20.158879872387462, 114.0749240047602, 153.7271425095486, 94.47962146611766, 
10.809339807040551, 13.4777721593919, 101.124591737239, 10.962337867638414], 
"eval_len": [20, 23, 19, 78, 115, 52, 17, 19, 76, 13]}

 37%|███▋      | 369999/1000000 [5:09:43<6:38:26, 26.35it/s]global step 370000, trans_decision ep_re 39.06972766171473

{"global_step": 370000, "eval_re": [15.395191518714867, 16.327611283919254, 
18.42004952105197, 14.845603192683463, 69.9560696907425, 19.12577840276347, 
9.665268572899851, 103.60451826272951, 81.17492089202254, 42.18226527961986], 
"eval_len": [22, 25, 20, 17, 46, 21, 13, 90, 86, 37]}

 38%|███▊      | 379999/1000000 [5:18:20<6:35:14, 26.14it/s]global step 380000, trans_decision ep_re 37.302195678688506

{"global_step": 380000, "eval_re": [96.96014236527876, 24.31370870611148, 
134.93123374260009, 19.544856068251683, 19.601848818662276, 9.306043368989412, 
15.275525114866124, 11.064000439086884, 19.363006552957515, 22.66159161008083], 
"eval_len": [70, 28, 71, 23, 23, 11, 17, 17, 23, 23]}

 39%|███▉      | 389997/1000000 [5:26:33<6:34:40, 25.76it/s]global step 390000, trans_decision ep_re 57.22103322266223

{"global_step": 390000, "eval_re": [60.665891131778025, 20.148001725951705, 
134.5673675005233, 10.711232456987341, 84.16439994298293, 14.94583910499351, 
121.38490411089724, 45.5981092665618, 7.248513549898296, 72.77607343604824], 
"eval_len": [85, 18, 104, 22, 52, 19, 76, 81, 16, 45]}

 40%|███▉      | 399999/1000000 [5:35:10<6:20:15, 26.30it/s]global step 400000, trans_decision ep_re 36.374774327997216

{"global_step": 400000, "eval_re": [23.902012040230826, 17.077105946043115, 
88.12060771994727, 10.944942005138925, 12.821231624239743, 9.861848193185912, 
90.68721984229184, 15.008964568540836, 11.488352676421886, 83.8354586639318], 
"eval_len": [23, 19, 63, 17, 18, 15, 60, 18, 16, 58]}

 41%|████      | 409999/1000000 [5:43:24<6:19:34, 25.91it/s]global step 410000, trans_decision ep_re 43.71476285124376

{"global_step": 410000, "eval_re": [19.755325840969196, 12.791955706607395, 
12.494593716396988, 30.215526131648346, 15.54935618162176, 17.407545048281555, 
144.05499331645964, 10.612997462485259, 110.04264109686632, 64.22269401110115], 
"eval_len": [20, 19, 20, 49, 18, 41, 113, 14, 100, 84]}

 42%|████▏     | 419999/1000000 [5:51:50<6:10:33, 26.09it/s]global step 420000, trans_decision ep_re 34.67031101408435

{"global_step": 420000, "eval_re": [53.98068111379461, 17.02701736903936, 
18.230298664677942, 25.10594164034421, 89.62985456096219, 19.255548382902173, 
13.591643548141693, 74.9573543759128, 20.032952282814673, 14.891818202253823], 
"eval_len": [40, 18, 23, 30, 75, 18, 16, 62, 27, 31]}

 43%|████▎     | 429998/1000000 [6:00:15<5:58:48, 26.48it/s]global step 430000, trans_decision ep_re 53.33330608524925

{"global_step": 430000, "eval_re": [50.02528263057903, 29.67725345814435, 
74.08613835261882, 92.60664969666341, 16.145832025141424, 115.76292848630173, 
14.698743596794806, 94.47023886076063, 30.04851025342342, 15.811483492064943], 
"eval_len": [44, 34, 49, 64, 37, 80, 19, 60, 38, 17]}

 44%|████▍     | 439997/1000000 [6:08:41<5:56:49, 26.16it/s]global step 440000, trans_decision ep_re 49.54777152976176

{"global_step": 440000, "eval_re": [12.635219432778705, 36.58118801296062, 
78.65485355368901, 27.48921787642292, 70.66476426886408, 68.46918127122765, 
81.99565745721003, 36.48237758216541, 72.42946511568951, 10.075790726609657], 
"eval_len": [14, 48, 60, 25, 53, 41, 59, 45, 54, 17]}

 45%|████▍     | 449999/1000000 [6:17:20<5:51:01, 26.11it/s]global step 450000, trans_decision ep_re 31.20138662705034

{"global_step": 450000, "eval_re": [13.846553108631758, 13.460637545564458, 
19.185917322244087, 16.357233600161276, 9.379172677844455, 98.17314735859364, 
20.40383651557097, 94.22015686880286, 14.394266264709557, 12.592945008380278], 
"eval_len": [17, 20, 24, 16, 11, 69, 25, 67, 18, 18]}

 46%|████▌     | 459998/1000000 [6:25:33<5:42:21, 26.29it/s]global step 460000, trans_decision ep_re 69.10337600770637

{"global_step": 460000, "eval_re": [18.44223846977778, 16.31349364632691, 
21.265138986569852, 67.15760694192453, 7.960834158850097, 20.566254248295316, 
10.409630999379072, 153.8246224972315, 96.13664712779443, 278.9572930009143], 
"eval_len": [18, 18, 26, 48, 11, 26, 12, 100, 71, 128]}

 47%|████▋     | 469999/1000000 [6:34:10<5:34:46, 26.39it/s]global step 470000, trans_decision ep_re 45.51351992775087

{"global_step": 470000, "eval_re": [34.579684336671, 85.91304571970029, 
64.30638890499007, 31.896629943446516, 11.878118929266561, 20.353379030045254, 
9.217227198310248, 19.368335276227764, 95.92124978508649, 81.70114015376453], 
"eval_len": [38, 76, 45, 46, 15, 29, 13, 24, 65, 71]}

 48%|████▊     | 479998/1000000 [6:42:23<5:31:44, 26.12it/s]global step 480000, trans_decision ep_re 54.61583003544858

{"global_step": 480000, "eval_re": [115.15457371858399, 68.37601977668422, 
93.54399422881583, 17.264914336316597, 16.723199510248794, 16.22985311714502, 
7.740707036061826, 10.930609749088946, 68.60123344469031, 131.59319543685032], 
"eval_len": [82, 54, 75, 22, 22, 18, 15, 16, 51, 110]}

 49%|████▉     | 489999/1000000 [6:51:00<5:25:55, 26.08it/s]global step 490000, trans_decision ep_re 63.35825655426635

{"global_step": 490000, "eval_re": [97.42553486938922, 17.88922962882588, 
167.40508692995056, 10.305630190742907, 20.06201317382217, 99.56502749050802, 
24.13402296619823, 23.408182290028655, 21.165925164293768, 152.22191283890413], 
"eval_len": [81, 25, 89, 14, 19, 79, 24, 24, 24, 102]}

 50%|████▉     | 499999/1000000 [6:59:15<5:19:32, 26.08it/s]global step 500000, trans_decision ep_re 54.60395126275755

{"global_step": 500000, "eval_re": [12.104005565996024, 52.91710009017978, 
16.54090174735287, 78.3362891901448, 105.94828193174686, 15.851326899927288, 
74.3506983143879, 70.38623374808688, 99.90948958281425, 19.695185556938945], 
"eval_len": [17, 47, 19, 62, 79, 19, 66, 47, 81, 20]}

 51%|█████     | 509998/1000000 [7:07:40<5:05:35, 26.72it/s]global step 510000, trans_decision ep_re 21.890740320352897

{"global_step": 510000, "eval_re": [42.73470217690603, 42.634175879345335, 
15.21480590247046, 14.630333669227587, 10.983481887064224, 12.749342072313114, 
15.472399430564014, 8.420344776843915, 35.50596101814654, 20.56185639064779], 
"eval_len": [49, 38, 18, 21, 17, 24, 18, 13, 34, 22]}

 52%|█████▏    | 519997/1000000 [7:16:05<5:06:33, 26.10it/s]global step 520000, trans_decision ep_re 21.150443621554135

{"global_step": 520000, "eval_re": [73.86284455743584, 21.201416156250176, 
10.632052010812588, 16.899530854408297, 13.165175187110721, 19.005164901067594, 
12.275461624257444, 19.08543632443802, 12.633557781057904, 12.743796818702782], 
"eval_len": [51, 23, 14, 24, 20, 19, 21, 21, 18, 24]}

 53%|█████▎    | 529998/1000000 [7:24:31<4:55:15, 26.53it/s]global step 530000, trans_decision ep_re 48.73471093808281

{"global_step": 530000, "eval_re": [13.612219998193062, 20.783748051805734, 
95.33089797833031, 18.447207942124063, 16.796901683913955, 13.71242662207355, 
127.3263587365783, 157.75303009580807, 9.079057503375957, 14.505260768625051], 
"eval_len": [26, 28, 62, 20, 24, 23, 66, 100, 11, 21]}

 54%|█████▍    | 539997/1000000 [7:32:56<4:53:38, 26.11it/s]global step 540000, trans_decision ep_re 33.08227223893768

{"global_step": 540000, "eval_re": [17.73829736780893, 11.27215279445343, 
38.97320520628098, 10.499686288674752, 68.25833486002922, 14.14504242718044, 
53.96264460172949, 8.83677535961709, 92.17761906285313, 14.958964420749412], 
"eval_len": [18, 13, 44, 12, 44, 20, 46, 12, 70, 21]}

 55%|█████▍    | 549999/1000000 [7:41:21<4:46:56, 26.14it/s]global step 550000, trans_decision ep_re 23.7136855521301

{"global_step": 550000, "eval_re": [9.699888789939177, 12.50558122620989, 
20.499895464223474, 11.78729156136466, 20.41148796192358, 16.835164465171385, 
22.264548966429548, 8.922719581592318, 43.195682625162114, 71.01459487928489], 
"eval_len": [16, 22, 23, 19, 21, 19, 25, 12, 59, 44]}

 56%|█████▌    | 559998/1000000 [7:50:00<4:36:56, 26.48it/s]global step 560000, trans_decision ep_re 96.74651365509904

{"global_step": 560000, "eval_re": [95.79525054269025, 80.26622034341739, 
31.733108305533264, 85.36804784136393, 30.739606545982372, 151.42777226351336, 
107.22288219035035, 79.75952851712663, 77.33311082735577, 227.81960917365703], 
"eval_len": [85, 67, 41, 60, 32, 84, 61, 52, 67, 119]}

 57%|█████▋    | 569997/1000000 [7:58:14<4:33:05, 26.24it/s]global step 570000, trans_decision ep_re 22.19940400825797

{"global_step": 570000, "eval_re": [20.578470425908247, 69.53434967857484, 
13.97849740664229, 9.899010208486956, 21.509730845401382, 28.019899087155483, 
9.721221400228579, 10.35756744811212, 25.72547157520905, 12.669822006860786], 
"eval_len": [25, 61, 18, 13, 25, 35, 16, 24, 30, 23]}

 58%|█████▊    | 579999/1000000 [8:06:50<4:27:06, 26.21it/s]global step 580000, trans_decision ep_re 52.86927504359511

{"global_step": 580000, "eval_re": [27.661577420011195, 10.034131292728967, 
80.62475281409577, 145.49541663515978, 19.699789383478034, 19.529475301981194, 
181.586589482238, 15.75163348980691, 16.2182828547416, 12.091101761709657], 
"eval_len": [40, 15, 54, 104, 23, 21, 111, 17, 18, 19]}

 59%|█████▉    | 589999/1000000 [8:15:05<4:20:31, 26.23it/s]global step 590000, trans_decision ep_re 36.15991835184603

{"global_step": 590000, "eval_re": [17.082038491662296, 73.01168530645582, 
95.6809193921653, 22.08475405032993, 10.499088239690035, 78.49442428475221, 
15.174250074858469, 16.393962934107165, 20.968194624770835, 12.209866119668298],
"eval_len": [19, 57, 54, 22, 18, 68, 18, 26, 20, 16]}

 60%|█████▉    | 599999/1000000 [8:23:40<4:17:33, 25.88it/s]global step 600000, trans_decision ep_re 32.07495435876633

{"global_step": 600000, "eval_re": [14.49780327531258, 15.306525239415095, 
129.10078438145032, 42.68861433926905, 13.35984532805673, 14.491147123128915, 
11.437673974645776, 25.439707982394207, 24.843743339539383, 29.583698604451147],
"eval_len": [22, 25, 92, 39, 16, 18, 17, 29, 35, 39]}

 61%|██████    | 609999/1000000 [8:31:55<4:08:43, 26.13it/s]global step 610000, trans_decision ep_re 45.48933049216074

{"global_step": 610000, "eval_re": [20.97315774567761, 10.012116827491981, 
121.35647669076128, 37.608673302674816, 30.789665866853554, 33.86486912791341, 
9.769501553964378, 19.871180694274596, 64.290551876454, 106.3571112355418], 
"eval_len": [20, 12, 80, 42, 40, 33, 20, 23, 39, 82]}

 62%|██████▏   | 619998/1000000 [8:40:21<3:58:45, 26.53it/s]global step 620000, trans_decision ep_re 42.212686696521935

{"global_step": 620000, "eval_re": [8.423929350789036, 76.5147153991928, 
18.972307735611505, 73.55325578582071, 13.663636393713693, 41.61983995017436, 
8.875816105467063, 33.76048010157616, 91.28219990044997, 55.46068624242407], 
"eval_len": [23, 58, 21, 49, 17, 36, 15, 32, 54, 44]}

 63%|██████▎   | 629997/1000000 [8:48:46<3:55:54, 26.14it/s]global step 630000, trans_decision ep_re 49.88861650164806

{"global_step": 630000, "eval_re": [91.45271413928764, 12.507788110030727, 
15.084819613726653, 164.5618352585676, 126.65619669758313, 35.474487909428596, 
15.734585836257615, 15.485648060828703, 12.164063576518076, 9.764025814251863], 
"eval_len": [69, 20, 18, 89, 84, 50, 19, 16, 19, 16]}

 64%|██████▍   | 639999/1000000 [8:57:12<3:47:38, 26.36it/s]global step 640000, trans_decision ep_re 54.53881439850824

{"global_step": 640000, "eval_re": [107.91127645267314, 12.42000244178272, 
108.23950727567096, 87.12822038930511, 18.037025266467207, 16.417962892297894, 
26.16767292585552, 78.87589005020253, 74.15586046913086, 16.03472582169637], 
"eval_len": [71, 16, 72, 68, 19, 38, 42, 56, 69, 21]}

 65%|██████▍   | 649998/1000000 [9:05:50<3:42:41, 26.19it/s]global step 650000, trans_decision ep_re 27.192296262821667

{"global_step": 650000, "eval_re": [18.69006914645556, 21.917871505798793, 
9.062767094780275, 70.18303821348135, 14.63674092642853, 27.578760452940177, 
62.73137715315572, 16.541718403745413, 13.356822116266, 17.223797615164923], 
"eval_len": [19, 20, 14, 48, 27, 28, 49, 24, 26, 22]}

 66%|██████▌   | 659997/1000000 [9:14:02<3:34:50, 26.38it/s]global step 660000, trans_decision ep_re 47.04127275185424

{"global_step": 660000, "eval_re": [101.20150064777407, 20.087183488852485, 
20.560192190918507, 18.1211012910999, 10.702780281915201, 174.88730457320636, 
14.923731362714205, 9.143729178957809, 75.93425257299843, 24.85095193010542], 
"eval_len": [60, 19, 26, 27, 13, 94, 25, 20, 44, 46]}

 67%|██████▋   | 669999/1000000 [9:22:40<3:34:08, 25.68it/s]global step 670000, trans_decision ep_re 36.39031300109729

{"global_step": 670000, "eval_re": [13.830125224553985, 12.461577706412607, 
78.28715165526131, 22.116973664069697, 24.369251283525724, 93.92639907562733, 
61.578734488179485, 27.082908888434297, 16.4232168253641, 13.826791199544376], 
"eval_len": [17, 16, 67, 27, 45, 68, 53, 24, 17, 21]}

 68%|██████▊   | 679999/1000000 [9:30:53<3:23:56, 26.15it/s]global step 680000, trans_decision ep_re 63.914789011591175

{"global_step": 680000, "eval_re": [16.14063518280732, 14.407949119386785, 
14.140971564675139, 24.163765332855093, 13.630540228699246, 17.548075627282902, 
245.37144356235098, 198.809708693091, 84.04481813899486, 10.889982665768462], 
"eval_len": [20, 18, 20, 42, 16, 25, 123, 116, 53, 14]}

 69%|██████▉   | 689999/1000000 [9:39:30<3:17:55, 26.10it/s]global step 690000, trans_decision ep_re 33.053669558954944

{"global_step": 690000, "eval_re": [64.54384013818971, 15.951098125953358, 
26.596279348641943, 12.16219043930191, 71.45914694355385, 19.70214023186609, 
82.04701990701734, 12.748155809606862, 15.806517389761957, 9.52030725565639], 
"eval_len": [46, 19, 24, 13, 49, 25, 74, 19, 23, 13]}

 70%|██████▉   | 699997/1000000 [9:47:44<3:12:01, 26.04it/s]global step 700000, trans_decision ep_re 32.52359150756769

{"global_step": 700000, "eval_re": [22.57068633648343, 55.835055767871694, 
79.28036680526351, 10.25098929562001, 15.318083377594464, 66.32855574129803, 
20.305331205331065, 26.097412758633318, 17.434643619423895, 11.814790168157417],
"eval_len": [26, 61, 55, 12, 18, 45, 23, 25, 22, 39]}

 71%|███████   | 709999/1000000 [9:56:20<3:04:19, 26.22it/s]global step 710000, trans_decision ep_re 59.5469554353535

{"global_step": 710000, "eval_re": [46.668812430121264, 18.512448604951526, 
13.940878556429473, 11.141198451785412, 34.61430804480828, 22.391909193943793, 
154.67892430343844, 24.764720254315506, 112.99634044983303, 155.7600140639083], 
"eval_len": [43, 20, 20, 29, 35, 21, 76, 30, 75, 88]}

 72%|███████▏  | 719999/1000000 [10:04:36<3:00:18, 25.88it/s]global step 720000, trans_decision ep_re 23.92222624717788

{"global_step": 720000, "eval_re": [19.387890584380767, 14.191070826386198, 
11.193727207689774, 41.36629089620867, 12.512319879992802, 10.970709804854126, 
12.83138457567318, 84.86463589075763, 16.003383992031267, 15.900848813804366], 
"eval_len": [21, 23, 16, 65, 18, 17, 17, 60, 20, 39]}

 73%|███████▎  | 729999/1000000 [10:13:00<2:50:22, 26.41it/s]global step 730000, trans_decision ep_re 18.663044134830916

{"global_step": 730000, "eval_re": [16.812559132070405, 7.749346239422878, 
22.353408437065468, 10.430605409923366, 24.28964137190913, 21.088798693902753, 
16.134409533033594, 17.67654346419792, 36.55932684016257, 13.535802226621062], 
"eval_len": [19, 11, 24, 24, 28, 43, 19, 21, 52, 16]}

 74%|███████▍  | 739998/1000000 [10:21:25<2:43:42, 26.47it/s]global step 740000, trans_decision ep_re 54.197752448453095

{"global_step": 740000, "eval_re": [80.60513993007072, 19.949307393682787, 
35.27764810890667, 32.90321011987046, 265.26588219803756, 11.657060795158761, 
14.245107127409968, 44.090400752083895, 13.838564393340619, 24.14520366596944], 
"eval_len": [58, 21, 39, 45, 120, 16, 15, 59, 15, 41]}

 75%|███████▍  | 749999/1000000 [10:30:00<2:39:20, 26.15it/s]global step 750000, trans_decision ep_re 34.20733121278802

{"global_step": 750000, "eval_re": [22.533234997408716, 7.7349101860413345, 
19.20207263807709, 15.347183033486626, 29.02139620205661, 29.07833633983188, 
13.833751397087727, 18.65941129997597, 168.9218461378841, 17.741169896030076], 
"eval_len": [24, 19, 23, 21, 24, 28, 17, 20, 86, 28]}

 76%|███████▌  | 759999/1000000 [10:38:15<2:33:30, 26.06it/s]global step 760000, trans_decision ep_re 38.5889956344

{"global_step": 760000, "eval_re": [11.052279605036073, 14.742448742784646, 
119.35954501571214, 9.293011536415788, 10.84016541269292, 14.736172981556985, 
22.181928935169243, 140.48935025640282, 31.971762926720213, 11.223290931509158],
"eval_len": [14, 16, 79, 16, 23, 23, 21, 79, 53, 19]}

 77%|███████▋  | 769999/1000000 [10:46:50<2:26:38, 26.14it/s]global step 770000, trans_decision ep_re 22.1952194507873

{"global_step": 770000, "eval_re": [11.111690694914058, 30.067355129629128, 
61.28845497640871, 10.929585510442262, 16.188395542428943, 8.136374644559854, 
26.757221478158993, 15.543352928958813, 18.00431507328963, 23.925448529082626], 
"eval_len": [23, 34, 70, 18, 19, 12, 26, 20, 18, 24]}

 78%|███████▊  | 779999/1000000 [10:55:04<2:20:50, 26.03it/s]global step 780000, trans_decision ep_re 34.345495332573044

{"global_step": 780000, "eval_re": [15.26327454837001, 8.294250181652028, 
52.08454615312769, 27.89371937820904, 131.70619983655644, 12.575189127973438, 
30.058959996846173, 15.074757404640698, 14.518190213837505, 35.9858664845174], 
"eval_len": [20, 34, 40, 28, 94, 18, 40, 26, 21, 47]}

 79%|███████▉  | 789999/1000000 [11:03:40<2:14:01, 26.11it/s]global step 790000, trans_decision ep_re 48.95700159237403

{"global_step": 790000, "eval_re": [10.795404194149896, 9.345293468097736, 
13.62289083559472, 50.395060059976295, 99.35143104010389, 12.106924725987747, 
62.02676481877512, 16.184440684531094, 199.2544687198874, 16.48733737663638], 
"eval_len": [15, 12, 17, 40, 53, 14, 52, 19, 111, 24]}

 80%|███████▉  | 799997/1000000 [11:11:54<2:07:59, 26.04it/s]global step 800000, trans_decision ep_re 41.74988452657603

{"global_step": 800000, "eval_re": [66.45935787603496, 9.674106683510876, 
21.114185517883843, 64.4594621746468, 12.487416585635433, 135.68017251674175, 
48.12385848858072, 22.106728997660102, 24.835247890824565, 12.55830853424127], 
"eval_len": [42, 21, 23, 63, 17, 94, 55, 26, 26, 17]}

 81%|████████  | 809999/1000000 [11:20:30<1:59:57, 26.40it/s]global step 810000, trans_decision ep_re 49.88123849140655

{"global_step": 810000, "eval_re": [14.456351000026382, 86.6220644641802, 
109.87562541035744, 12.632632865967498, 10.73975266399864, 10.754952833648048, 
11.265368046010128, 146.87287347609515, 69.38127038862517, 26.211493765156767], 
"eval_len": [16, 72, 67, 26, 14, 14, 15, 76, 88, 30]}

 82%|████████▏ | 819999/1000000 [11:29:00<1:55:43, 25.92it/s]global step 820000, trans_decision ep_re 54.83024900387704

{"global_step": 820000, "eval_re": [12.74821834716288, 15.000612867762195, 
17.92353361765746, 17.1158998505482, 211.51229363618071, 136.6644378952797, 
14.077706676247816, 16.294033044580665, 14.179865921828089, 92.78588818152281], 
"eval_len": [17, 18, 20, 18, 115, 91, 22, 18, 21, 76]}

 83%|████████▎ | 829998/1000000 [11:37:17<1:47:26, 26.37it/s]global step 830000, trans_decision ep_re 26.90296784211369

{"global_step": 830000, "eval_re": [14.056989939250562, 11.560343097694798, 
15.223721821393351, 19.53938707849496, 124.31462027732091, 16.816902020505957, 
9.929572111428968, 13.057867190654601, 9.114792349481592, 35.41548253491119], 
"eval_len": [21, 15, 18, 26, 70, 19, 12, 16, 14, 39]}

 84%|████████▍ | 839997/1000000 [11:45:44<1:44:26, 25.53it/s]global step 840000, trans_decision ep_re 30.736996554553862

{"global_step": 840000, "eval_re": [22.990608519423052, 16.845342427375595, 
18.509075440801883, 20.158818490941485, 11.038304943189416, 96.88760040383963, 
11.485647839144733, 44.20752720918061, 42.16648351874221, 23.080556752899987], 
"eval_len": [58, 46, 21, 26, 18, 64, 14, 57, 40, 24]}

 85%|████████▍ | 849997/1000000 [11:54:16<1:36:18, 25.96it/s]global step 850000, trans_decision ep_re 17.06217455750038

{"global_step": 850000, "eval_re": [17.171807003747954, 13.958263168707381, 
10.60404495788195, 17.79560469420516, 22.58463517922021, 20.52730507080299, 
14.819813616209547, 16.93820504546387, 14.599588921226365, 21.622477917538394], 
"eval_len": [22, 16, 23, 26, 36, 30, 18, 24, 23, 27]}

 86%|████████▌ | 859997/1000000 [12:02:43<1:29:20, 26.12it/s]global step 860000, trans_decision ep_re 36.43769691278819

{"global_step": 860000, "eval_re": [16.370178233387758, 8.743118047751869, 
22.504332398621163, 108.76240822987671, 14.849873982874575, 16.097845521895792, 
33.32257266427418, 37.18665568331043, 46.42952338271353, 60.110460983175926], 
"eval_len": [25, 14, 25, 65, 20, 27, 30, 40, 38, 43]}

 87%|████████▋ | 869999/1000000 [12:11:20<1:23:42, 25.88it/s]global step 870000, trans_decision ep_re 21.825959825868342

{"global_step": 870000, "eval_re": [13.654503145138086, 24.798939209037336, 
12.518678097581406, 13.487095017791807, 27.000608048642626, 44.07437557308406, 
13.914576786640675, 18.28794097705568, 17.798280357068318, 32.72460104664341], 
"eval_len": [17, 28, 17, 16, 26, 46, 28, 27, 22, 28]}

 88%|████████▊ | 879999/1000000 [12:19:36<1:16:50, 26.03it/s]global step 880000, trans_decision ep_re 39.41600225840445

{"global_step": 880000, "eval_re": [131.18628165834548, 12.206894350019473, 
26.557003358107927, 84.35841753004823, 10.541760184112734, 10.364874173495714, 
40.89710372927706, 15.711649296734523, 25.97494371077926, 36.36109459312413], 
"eval_len": [68, 17, 25, 57, 18, 13, 46, 18, 34, 34]}

 89%|████████▉ | 889999/1000000 [12:28:04<1:10:14, 26.10it/s]global step 890000, trans_decision ep_re 48.04520456667288

{"global_step": 890000, "eval_re": [89.44828465440477, 11.781226672167955, 
51.07362719888304, 81.19790044860672, 5.492018361133626, 19.417608286553392, 
82.92647784552796, 105.75213531140567, 12.153746463926176, 21.209020424119508], 
"eval_len": [71, 14, 72, 54, 27, 26, 71, 69, 13, 24]}

 90%|████████▉ | 899999/1000000 [12:36:31<1:03:36, 26.20it/s]global step 900000, trans_decision ep_re 26.279533992296457

{"global_step": 900000, "eval_re": [18.201723659802163, 105.7337650701403, 
13.222059938129958, 18.178095799861737, 16.67160829226703, 14.208666896314279, 
19.541534345266502, 15.645079887570592, 15.05350280774341, 26.339303225868598], 
"eval_len": [20, 60, 20, 17, 25, 17, 21, 21, 20, 25]}

 91%|█████████ | 909998/1000000 [12:44:55<56:35, 26.51it/s]global step 910000, trans_decision ep_re 48.25330469859102

{"global_step": 910000, "eval_re": [8.916471807636949, 24.289308824919555, 
13.118956620550762, 16.09151511970912, 13.027016352132682, 39.62560310887378, 
18.84425560617582, 163.85868950532765, 94.2579648337073, 90.50326520687655], 
"eval_len": [11, 25, 22, 20, 19, 50, 21, 81, 79, 71]}

 92%|█████████▏| 919999/1000000 [12:53:30<51:04, 26.11it/s]global step 920000, trans_decision ep_re 28.490768217918884

{"global_step": 920000, "eval_re": [78.92845382178133, 11.81794281558062, 
8.75213610695098, 18.31163371620635, 11.924045050720704, 19.4102681147676, 
83.56472569945792, 27.231321239158756, 11.413762863976741, 13.5533927505878], 
"eval_len": [59, 18, 18, 24, 25, 22, 68, 27, 16, 17]}

 93%|█████████▎| 929997/1000000 [13:01:42<44:25, 26.26it/s]global step 930000, trans_decision ep_re 29.887477428701505

{"global_step": 930000, "eval_re": [24.2155882751594, 14.085674216541276, 
12.92607582632856, 26.237003938154796, 60.139451950553486, 18.390105913752766, 
11.426818166791302, 38.82730486415435, 21.012541482036532, 71.61420965354257], 
"eval_len": [23, 19, 16, 37, 53, 19, 15, 49, 26, 73]}

 94%|█████████▍| 939999/1000000 [13:10:07<38:22, 26.06it/s]global step 940000, trans_decision ep_re 28.662023718997858

{"global_step": 940000, "eval_re": [11.611608592736298, 7.487751253220126, 
12.063215455495262, 14.190039984964102, 17.57158667740349, 43.07749841354585, 
132.60376233219927, 8.972050447718026, 27.649776388385508, 11.392947644310679], 
"eval_len": [14, 14, 19, 17, 19, 39, 75, 13, 39, 18]}

 95%|█████████▍| 949999/1000000 [13:18:33<31:55, 26.10it/s]global step 950000, trans_decision ep_re 34.23413875993069

{"global_step": 950000, "eval_re": [13.263614121331722, 7.135858584732424, 
21.81116883179413, 16.97387715294695, 26.83906428098812, 6.526671215355293, 
20.320608073333176, 144.3163751562399, 8.261488289164225, 76.89266189342094], 
"eval_len": [23, 11, 25, 25, 24, 14, 19, 82, 18, 73]}

 96%|█████████▌| 959999/1000000 [13:27:10<25:38, 26.00it/s]global step 960000, trans_decision ep_re 18.68864882723519

{"global_step": 960000, "eval_re": [15.199906530778462, 9.936826210398433, 
20.44899646474302, 16.086503467472934, 14.211932801705947, 39.720059748669776, 
11.646687374787504, 18.473362771669844, 11.947543896179338, 29.214669005946682],
"eval_len": [26, 12, 21, 29, 19, 40, 14, 26, 15, 29]}

 97%|█████████▋| 969998/1000000 [13:35:22<18:49, 26.55it/s]global step 970000, trans_decision ep_re 44.409092589231435

{"global_step": 970000, "eval_re": [15.281693678752985, 13.51434131583446, 
10.028876589372661, 15.966345273039519, 134.15577886429452, 14.982797594555006, 
16.303005118770063, 128.55232725203157, 13.363031884297692, 81.94272832136586], 
"eval_len": [18, 16, 23, 20, 88, 22, 20, 85, 15, 65]}

 98%|█████████▊| 979997/1000000 [13:43:45<12:37, 26.40it/s]global step 980000, trans_decision ep_re 30.391155684356182

{"global_step": 980000, "eval_re": [9.998612671252495, 9.648898198007442, 
14.414388342442095, 17.783058560015697, 32.2535567189678, 31.22930465895103, 
78.29312210520536, 17.105235237861177, 25.502989814163072, 67.68239053669564], 
"eval_len": [12, 17, 20, 25, 30, 27, 109, 30, 27, 43]}

 99%|█████████▉| 989999/1000000 [13:52:20<06:24, 26.01it/s]global step 990000, trans_decision ep_re 66.40518382142726

{"global_step": 990000, "eval_re": [156.46321629190288, 86.44088547445203, 
83.17193140775373, 14.756232368187206, 23.81582095838562, 14.472749989631579, 
68.15753006764967, 69.70912248440106, 19.294292498410616, 127.77005667349817], 
"eval_len": [96, 68, 73, 27, 25, 16, 56, 45, 23, 79]}

100%|█████████▉| 999997/1000000 [14:00:33<00:00, 26.39it/s]global step 1000000, trans_decision ep_re 38.838003140306384

{"global_step": 1000000, "eval_re": [8.296011074589837, 17.10217019716592, 
16.688417378962363, 110.45444888934404, 157.41778198038645, 12.051592071549173, 
15.553111844039561, 28.572676899846112, 11.489873090100353, 10.753947977080015],
"eval_len": [12, 21, 17, 67, 85, 27, 22, 25, 21, 19]}

100%|██████████| 1000000/1000000 [14:00:47<00:00, 19.82it/s]
