
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [05:20<12:21:16, 22.26it/s]global step 10000, trans_decision ep_re 241.15358359198035

{"global_step": 10000, "eval_re": [475.00078223846884, 151.4705629329655, 
129.9053707802526, 197.7885836905442, 320.85264516373144, 130.76402571143234, 
403.95780361039857, 141.28028450481807, 101.3186339428249, 359.19714334436696], 
"eval_len": [100, 29, 25, 37, 59, 25, 75, 27, 20, 68]}

  2%|▏         | 19999/1000000 [15:30<12:09:32, 22.39it/s]global step 20000, trans_decision ep_re 205.65328664526038

{"global_step": 20000, "eval_re": [103.75043969230255, 89.67536557800705, 
122.44141682910129, 151.635384410375, 97.06296239964603, 305.6588291256868, 
120.8861147565295, 389.3803983761351, 384.2895748986354, 291.75238038618494], 
"eval_len": [20, 18, 24, 29, 19, 59, 23, 75, 72, 56]}

  3%|▎         | 29997/1000000 [25:32<12:16:06, 21.96it/s]global step 30000, trans_decision ep_re 166.62122356419172

{"global_step": 30000, "eval_re": [113.76916193922713, 191.99780738226357, 
198.18160158777138, 89.69621655904298, 277.28960697963583, 127.19106511108077, 
141.722628293013, 213.2648429482643, 194.50863339914105, 118.5906714424772], 
"eval_len": [22, 37, 41, 18, 52, 25, 27, 44, 37, 23]}

  4%|▍         | 39999/1000000 [36:00<11:57:42, 22.29it/s]global step 40000, trans_decision ep_re 141.25617294002086

{"global_step": 40000, "eval_re": [153.9379400472258, 149.5756027650753, 
155.08920761974565, 113.60937001139722, 97.24291830053566, 113.5158905890457, 
271.28770691510005, 107.69866928603776, 119.91384314008822, 130.69058072595735],
"eval_len": [30, 29, 30, 22, 19, 22, 53, 21, 23, 25]}

  5%|▍         | 49999/1000000 [46:10<12:01:44, 21.94it/s]global step 50000, trans_decision ep_re 166.23667043339793

{"global_step": 50000, "eval_re": [130.99409121819886, 96.34298242278821, 
370.32475066575523, 140.12509243902386, 171.98739488364123, 120.81262378312273, 
208.301984148121, 150.37958584032026, 134.0235261815066, 139.07467275150148], 
"eval_len": [25, 19, 67, 27, 33, 23, 43, 29, 26, 27]}

  6%|▌         | 59999/1000000 [56:20<11:49:53, 22.07it/s]global step 60000, trans_decision ep_re 178.24525131666795

{"global_step": 60000, "eval_re": [188.86784936699385, 295.71539782869127, 
191.37044234146313, 97.07541934163655, 117.8431278658896, 383.29294544923823, 
155.4233750361722, 119.81794354159857, 113.84584535857411, 119.20016703642204], 
"eval_len": [36, 56, 37, 19, 23, 69, 30, 23, 22, 23]}

  7%|▋         | 69998/1000000 [1:06:22<11:38:58, 22.18it/s]global step 70000, trans_decision ep_re 149.41356887455078

{"global_step": 70000, "eval_re": [336.368845736557, 119.92034088991652, 
118.81228547230623, 200.82428451390658, 118.91389912122041, 107.2353990373195, 
112.31928608029651, 112.60956551338379, 131.01521150692528, 136.11657087367587],
"eval_len": [62, 23, 23, 39, 23, 21, 22, 22, 25, 26]}

  8%|▊         | 79999/1000000 [1:16:50<11:33:06, 22.12it/s]global step 80000, trans_decision ep_re 141.70412419734245

{"global_step": 80000, "eval_re": [102.54099935188168, 108.051597660443, 
97.01479129522924, 151.19892295644422, 107.30142712387192, 155.8462561806352, 
161.0235503861469, 110.6722075033692, 314.853725838791, 108.53776367661217], 
"eval_len": [20, 21, 19, 29, 21, 30, 31, 22, 61, 21]}

  9%|▉         | 89999/1000000 [1:27:00<11:29:47, 21.99it/s]global step 90000, trans_decision ep_re 171.35810267244355

{"global_step": 90000, "eval_re": [176.3443309463078, 125.0994102872336, 
107.14580546128018, 103.04910181846012, 174.8614867830514, 165.7209849196406, 
160.89470480155282, 120.27812021290761, 214.60841299071666, 365.5786685032848], 
"eval_len": [34, 24, 21, 20, 33, 32, 31, 23, 41, 65]}

 10%|▉         | 99997/1000000 [1:37:02<11:21:25, 22.01it/s]global step 100000, trans_decision ep_re 208.53138244287942

{"global_step": 100000, "eval_re": [96.38079525614029, 154.21216338507892, 
120.6605802834592, 154.32105085568764, 360.292244404658, 517.5273981701097, 
113.20992995211243, 124.74287752252266, 329.53402738273064, 114.4327572162947], 
"eval_len": [19, 30, 23, 31, 70, 96, 22, 24, 66, 22]}

 11%|█         | 109999/1000000 [1:47:30<11:10:37, 22.12it/s]global step 110000, trans_decision ep_re 192.12421237496602

{"global_step": 110000, "eval_re": [242.46471392652018, 385.0496720808789, 
125.81137296193664, 161.74775495916208, 434.96711746264407, 97.75110053863474, 
123.85343724875685, 114.01111143076525, 108.82316976535775, 126.76267337500397],
"eval_len": [46, 71, 24, 31, 81, 19, 24, 22, 21, 25]}

 12%|█▏        | 119997/1000000 [1:57:50<11:00:32, 22.20it/s]global step 120000, trans_decision ep_re 195.67262735496797

{"global_step": 120000, "eval_re": [139.81254982029023, 90.87568855954699, 
113.62762305864287, 219.0170865894245, 326.58521108562695, 314.9538726271476, 
136.27930559880014, 117.97006699632283, 389.41092958368483, 108.19393963019269],
"eval_len": [27, 18, 22, 41, 62, 59, 26, 23, 70, 21]}

 13%|█▎        | 129999/1000000 [2:08:00<11:04:35, 21.82it/s]global step 130000, trans_decision ep_re 174.0943169332829

{"global_step": 130000, "eval_re": [244.31099321856226, 135.44533123582084, 
113.9443823666295, 366.3542507420408, 193.5258276439751, 167.2621327786042, 
103.02402806447402, 130.49606431871368, 114.12158176825746, 172.4585771957508], 
"eval_len": [46, 26, 22, 68, 37, 32, 20, 25, 22, 33]}

 14%|█▍        | 139999/1000000 [2:18:01<10:44:32, 22.24it/s]global step 140000, trans_decision ep_re 213.46977465692788

{"global_step": 140000, "eval_re": [161.27420283622953, 205.27986214973023, 
350.06339338380235, 366.1482738506095, 178.4805097941162, 134.87108005722658, 
129.3958620774507, 161.6112118081461, 357.79072910286186, 89.78262150910584], 
"eval_len": [31, 39, 65, 67, 34, 26, 25, 31, 68, 18]}

 15%|█▍        | 149999/1000000 [2:28:30<10:51:14, 21.75it/s]global step 150000, trans_decision ep_re 141.7775299460803

{"global_step": 150000, "eval_re": [125.18285998841338, 153.49174550540206, 
119.99075151630925, 161.40552675089765, 141.74766933218018, 173.0821029379089, 
146.09042252957545, 173.90588534112297, 90.19650590713856, 132.68182965185497], 
"eval_len": [24, 30, 23, 31, 28, 33, 28, 33, 18, 26]}

 16%|█▌        | 159999/1000000 [2:38:40<10:50:03, 21.54it/s]global step 160000, trans_decision ep_re 216.07675725107865

{"global_step": 160000, "eval_re": [180.68189649890476, 126.06819894688125, 
157.36964349656827, 142.8658726664438, 348.3172675082154, 114.99273815006899, 
296.7939395095851, 150.58436700243274, 336.6717862984353, 306.4218624332509], 
"eval_len": [35, 24, 30, 28, 64, 22, 57, 29, 63, 58]}

 17%|█▋        | 169999/1000000 [2:48:50<10:46:52, 21.38it/s]global step 170000, trans_decision ep_re 236.19901073086763

{"global_step": 170000, "eval_re": [229.52312725470307, 229.267623050345, 
414.2571037107489, 102.23492518705417, 146.63868501499925, 140.00999125378877, 
509.36261881941977, 370.878152587642, 90.2306770066824, 129.58720342329312], 
"eval_len": [42, 43, 76, 20, 28, 27, 104, 68, 18, 25]}

 18%|█▊        | 179998/1000000 [2:58:51<10:21:39, 21.98it/s]global step 180000, trans_decision ep_re 199.105800960187

{"global_step": 180000, "eval_re": [374.47928495041725, 135.62176760863997, 
323.337737908272, 162.68035579483868, 235.48875326341042, 180.32264949981385, 
196.20637383098006, 134.62797271768474, 151.3911458262241, 96.90196820158884], 
"eval_len": [68, 26, 61, 31, 45, 35, 37, 26, 29, 19]}

 19%|█▉        | 189999/1000000 [3:09:20<10:27:56, 21.50it/s]global step 190000, trans_decision ep_re 199.57242149806268

{"global_step": 190000, "eval_re": [148.7703451523812, 144.92094041060244, 
108.57893277971918, 129.39719216942214, 184.03733830135755, 394.61607188651203, 
215.5058008276254, 406.7573250966077, 166.683673839958, 96.4565945164413], 
"eval_len": [29, 28, 21, 25, 35, 74, 41, 75, 32, 19]}

 20%|█▉        | 199999/1000000 [3:19:30<10:08:52, 21.90it/s]global step 200000, trans_decision ep_re 217.00359461882027

{"global_step": 200000, "eval_re": [362.55347039925175, 132.11814381019124, 
90.78669169920774, 486.706990566969, 124.83722008433375, 102.850323703477, 
206.59769072255244, 218.35572529844092, 257.32954358561835, 187.9001463181602], 
"eval_len": [66, 26, 18, 92, 24, 20, 40, 41, 53, 36]}

 21%|██        | 209999/1000000 [3:29:40<9:46:53, 22.43it/s]global step 210000, trans_decision ep_re 162.07259649351568

{"global_step": 210000, "eval_re": [131.61365680974785, 123.8078960773912, 
156.22424406424338, 141.07767553534504, 120.17040487920153, 102.8458919511471, 
140.30212992046157, 347.9729276492671, 120.30752441821492, 236.4036136301372], 
"eval_len": [25, 24, 30, 27, 23, 20, 27, 63, 23, 48]}

 22%|██▏       | 219999/1000000 [3:39:40<9:52:25, 21.94it/s]global step 220000, trans_decision ep_re 228.96862562528767

{"global_step": 220000, "eval_re": [102.31652707647817, 289.2648258925521, 
358.6513528818492, 102.08699041124481, 114.21546469295133, 413.37665622163513, 
151.16620911838788, 124.76143112621139, 314.59850864213684, 319.24829018942995],
"eval_len": [20, 57, 66, 20, 22, 76, 29, 24, 61, 58]}

 23%|██▎       | 229997/1000000 [3:50:10<9:55:56, 21.53it/s]global step 230000, trans_decision ep_re 241.21072630531307

{"global_step": 230000, "eval_re": [125.39713829784264, 113.67517059811676, 
436.4067856016015, 434.42637940266536, 280.4819435610682, 342.09733962889504, 
114.0881638954701, 96.57840379615057, 335.30452081629363, 133.65141745502714], 
"eval_len": [24, 22, 82, 80, 55, 65, 22, 19, 64, 26]}

 24%|██▍       | 239998/1000000 [4:00:20<9:26:16, 22.37it/s]global step 240000, trans_decision ep_re 266.3943045902913

{"global_step": 240000, "eval_re": [171.65191842969898, 412.3626513851556, 
330.20249033972425, 369.32195061969986, 218.84572693430212, 395.5663860284116, 
364.23767238141085, 119.4502325838139, 119.52102303113159, 162.7829941695639], 
"eval_len": [33, 78, 61, 68, 42, 74, 67, 23, 23, 31]}

 25%|██▍       | 249997/1000000 [4:10:30<9:23:23, 22.19it/s]global step 250000, trans_decision ep_re 196.81921470026472

{"global_step": 250000, "eval_re": [161.46135927580707, 125.62679194689484, 
387.7661335408682, 180.9612130312155, 445.2057025002428, 125.39038212644867, 
122.20838053383943, 186.68799974290258, 97.38410152312474, 135.5000827813036], 
"eval_len": [31, 24, 73, 34, 81, 24, 24, 36, 19, 26]}

 26%|██▌       | 259999/1000000 [4:20:40<9:22:47, 21.91it/s]global step 260000, trans_decision ep_re 160.28803948730067

{"global_step": 260000, "eval_re": [119.66537224866092, 118.77639012080155, 
154.18059234618732, 108.6383399920618, 136.2320588174246, 461.34800875092935, 
113.70506843622137, 134.51271193497382, 119.78722740692645, 136.0346248188196], 
"eval_len": [23, 23, 30, 21, 26, 90, 22, 26, 23, 26]}

 27%|██▋       | 269999/1000000 [4:30:50<9:11:40, 22.05it/s]global step 270000, trans_decision ep_re 210.25832564532158

{"global_step": 270000, "eval_re": [389.4084732824427, 189.89430711576713, 
118.44630080881252, 155.50604068092125, 96.45973608208597, 156.32783406967175, 
102.55777391790659, 394.02044972395197, 380.88552760164544, 119.07681317001035],
"eval_len": [70, 36, 23, 30, 19, 30, 20, 74, 71, 23]}

 28%|██▊       | 279999/1000000 [4:40:43<9:10:16, 21.81it/s]global step 280000, trans_decision ep_re 186.181879382805

{"global_step": 280000, "eval_re": [108.37832902835449, 96.84801100528004, 
384.596944396311, 119.5839580771163, 220.4547605374929, 125.00093798634916, 
140.01293035777672, 315.63940094794583, 124.79995053528062, 226.503570956143], 
"eval_len": [21, 19, 70, 23, 42, 24, 27, 59, 24, 43]}

 29%|██▉       | 289999/1000000 [4:50:52<9:04:32, 21.73it/s]global step 290000, trans_decision ep_re 207.30562228421508

{"global_step": 290000, "eval_re": [361.4342194225114, 139.64220107120696, 
129.1827730039403, 384.5473777919268, 118.63985854809178, 146.58997719218755, 
129.9873329893564, 206.60773376551677, 325.824161273867, 130.60058778354608], 
"eval_len": [65, 27, 25, 71, 23, 28, 25, 39, 63, 25]}

 30%|██▉       | 299999/1000000 [5:01:20<8:54:06, 21.84it/s]global step 300000, trans_decision ep_re 255.65567617788042

{"global_step": 300000, "eval_re": [400.23982951538625, 169.69288596625455, 
136.04702412703483, 534.6896078217685, 103.52605851220947, 428.9535942473369, 
128.84898973150825, 118.42837949066028, 147.41536370953162, 388.7150286571138], 
"eval_len": [73, 32, 26, 97, 20, 81, 25, 23, 29, 82]}

 31%|███       | 309998/1000000 [5:11:30<8:34:58, 22.33it/s]global step 310000, trans_decision ep_re 164.10726220168357

{"global_step": 310000, "eval_re": [142.94027709073066, 122.70680170490819, 
113.805722127765, 373.48699310296405, 276.2481064788905, 130.95999578411602, 
129.2362209034569, 113.27037691878937, 119.71263962150255, 118.70548828371246], 
"eval_len": [28, 24, 22, 70, 52, 25, 25, 22, 23, 23]}

 32%|███▏      | 319999/1000000 [5:21:40<8:27:36, 22.33it/s]global step 320000, trans_decision ep_re 170.2617330525631

{"global_step": 320000, "eval_re": [112.80599176417587, 140.5633511536255, 
190.55828788380086, 114.09728954208532, 114.03452189115507, 365.03068384342527, 
166.67983009909835, 119.64822014838427, 144.15201528434136, 235.047138915539], 
"eval_len": [22, 27, 37, 22, 22, 66, 32, 23, 28, 44]}

 33%|███▎      | 329999/1000000 [5:31:33<8:30:35, 21.87it/s]global step 330000, trans_decision ep_re 161.5282559444293

{"global_step": 330000, "eval_re": [119.64849475806955, 135.1255162363481, 
207.17092460890174, 120.43306325999362, 123.5069254843564, 126.16803303876301, 
383.2707641132906, 119.32533177803636, 128.31227749838678, 152.32122866814683], 
"eval_len": [23, 26, 39, 23, 24, 24, 71, 23, 25, 29]}

 34%|███▍      | 339999/1000000 [5:42:00<8:19:17, 22.03it/s]global step 340000, trans_decision ep_re 152.02686142838417

{"global_step": 340000, "eval_re": [138.46929981941545, 345.5706154060627, 
124.09307848736873, 177.9002140218034, 136.2499310437781, 114.0652957623873, 
84.65121608898319, 155.7146402296728, 103.13957185709168, 140.41475156727844], 
"eval_len": [27, 67, 24, 33, 26, 22, 17, 30, 20, 27]}

 35%|███▍      | 349998/1000000 [5:52:10<8:08:18, 22.19it/s]global step 350000, trans_decision ep_re 185.27653246812253

{"global_step": 350000, "eval_re": [156.26963284630796, 102.25578858355055, 
192.25247074952318, 341.3716523581882, 124.26036615340476, 279.76071285325565, 
108.16803930985135, 89.81325782802026, 350.13857709432193, 108.4748269048017], 
"eval_len": [30, 20, 37, 61, 24, 55, 21, 18, 65, 21]}

 36%|███▌      | 359998/1000000 [6:02:20<8:06:36, 21.92it/s]global step 360000, trans_decision ep_re 235.51505207741099

{"global_step": 360000, "eval_re": [186.23003833142803, 199.01521444938334, 
512.5330498247505, 117.62028327742551, 97.12468547238312, 102.60504318075571, 
431.95534004422876, 130.0037206582284, 152.67640587070673, 425.3867396648199], 
"eval_len": [36, 38, 94, 23, 19, 20, 80, 25, 29, 80]}

 37%|███▋      | 369998/1000000 [6:12:30<7:53:29, 22.18it/s]global step 370000, trans_decision ep_re 207.57463943475577

{"global_step": 370000, "eval_re": [330.95337939367613, 102.332665475728, 
343.713243528291, 290.0722986510591, 144.6905389682926, 129.80091755459165, 
340.0736166042877, 120.30595890765375, 124.52763717812007, 149.27613808585738], 
"eval_len": [60, 20, 64, 54, 28, 25, 66, 23, 24, 29]}

 38%|███▊      | 379997/1000000 [6:22:40<7:48:14, 22.07it/s]global step 380000, trans_decision ep_re 186.44592644872915

{"global_step": 380000, "eval_re": [384.18873593248986, 140.99679184504922, 
261.847138430308, 214.2430517172002, 112.04403319383619, 135.9472816546005, 
151.77864198347507, 107.23426281333883, 196.7498758143149, 159.42945110267874], 
"eval_len": [74, 27, 49, 40, 22, 27, 29, 21, 38, 31]}

 39%|███▉      | 389997/1000000 [6:32:50<7:30:11, 22.58it/s]global step 390000, trans_decision ep_re 199.73032693012055

{"global_step": 390000, "eval_re": [234.15827688041136, 294.63499807185656, 
305.2813088197143, 108.37042810997067, 349.629512519468, 123.58537606086026, 
96.03892407138625, 237.7450971640661, 123.02984678509179, 124.82950081838028], 
"eval_len": [45, 59, 59, 21, 64, 24, 19, 45, 24, 24]}

 40%|███▉      | 399997/1000000 [6:43:00<7:25:07, 22.47it/s]global step 400000, trans_decision ep_re 214.42852711787782

{"global_step": 400000, "eval_re": [326.05849055128954, 347.73479513123925, 
354.8736213297202, 130.40205268289236, 332.96551632147066, 134.22913702292223, 
96.81694828452784, 206.6103276696162, 96.22219278046224, 118.37218940463784], 
"eval_len": [64, 64, 67, 25, 63, 26, 19, 39, 19, 23]}

 41%|████      | 409999/1000000 [6:53:10<7:32:45, 21.72it/s]global step 410000, trans_decision ep_re 155.71662147734062

{"global_step": 410000, "eval_re": [96.6522575695005, 159.10581224355073, 
161.97283308933663, 129.63516407850003, 90.80594375628289, 453.0428253664108, 
114.31647815875186, 125.08112447105616, 118.10454817947127, 108.44922786054522],
"eval_len": [19, 30, 31, 25, 18, 81, 22, 24, 23, 21]}

 42%|████▏     | 419999/1000000 [7:03:20<7:10:26, 22.46it/s]global step 420000, trans_decision ep_re 167.1916824233695

{"global_step": 420000, "eval_re": [318.1472614029792, 128.12092326132708, 
96.4998327867385, 129.55179925584955, 148.6854264157174, 125.19665858816654, 
356.1036172515063, 160.39300919671476, 91.54689724517118, 117.67139882952459], 
"eval_len": [59, 25, 19, 25, 29, 24, 66, 31, 18, 23]}

 43%|████▎     | 429997/1000000 [7:13:30<7:15:33, 21.81it/s]global step 430000, trans_decision ep_re 135.87447092927562

{"global_step": 430000, "eval_re": [108.44556322875414, 129.89991955848038, 
90.77963522316487, 90.58214323709012, 95.99381074997875, 122.23525579980812, 
102.40146228614844, 108.48694871636664, 149.71200466535188, 360.2079658276128], 
"eval_len": [21, 25, 18, 18, 19, 24, 20, 21, 29, 68]}

 44%|████▍     | 439999/1000000 [7:23:40<7:02:16, 22.10it/s]global step 440000, trans_decision ep_re 208.18468294501167

{"global_step": 440000, "eval_re": [113.94365382310806, 208.1692079305, 
327.37520372202226, 167.0400174296567, 381.79383912306014, 151.40872123204838, 
359.18174692879717, 129.49050371456, 108.39840884315085, 135.04552670321317], 
"eval_len": [22, 41, 61, 32, 71, 29, 67, 25, 21, 26]}

 45%|████▍     | 449998/1000000 [7:33:50<6:57:58, 21.93it/s]global step 450000, trans_decision ep_re 148.93444326395223

{"global_step": 450000, "eval_re": [118.34578882346216, 84.63466824592909, 
141.7595782745796, 122.96454233134932, 135.64041112782022, 128.4194332136823, 
190.09681510654744, 119.2500424802771, 334.5092217534642, 113.72393128241089], 
"eval_len": [23, 17, 27, 24, 26, 25, 36, 23, 66, 22]}

 46%|████▌     | 459999/1000000 [7:44:00<6:59:14, 21.47it/s]global step 460000, trans_decision ep_re 186.46127017794333

{"global_step": 460000, "eval_re": [118.54394145848659, 161.2369639508162, 
413.3267989956111, 149.65940580117842, 144.65531390107634, 111.40070931282301, 
142.23514911173973, 112.48845948531049, 344.8098021382261, 166.2561576241652], 
"eval_len": [23, 31, 74, 29, 28, 22, 28, 22, 62, 32]}

 47%|████▋     | 469999/1000000 [7:54:10<6:41:57, 21.98it/s]global step 470000, trans_decision ep_re 192.0729820839482

{"global_step": 470000, "eval_re": [118.94425775874785, 91.01440830969275, 
354.6077556713149, 141.71811045488923, 161.6276758483791, 393.6282376025677, 
300.13903930319015, 119.44007302353694, 119.1044153738104, 120.50584749335329], 
"eval_len": [23, 18, 70, 27, 31, 76, 58, 23, 23, 23]}

 48%|████▊     | 479999/1000000 [8:04:20<6:32:41, 22.07it/s]global step 480000, trans_decision ep_re 207.64500086981258

{"global_step": 480000, "eval_re": [114.5481484103361, 206.01648822312217, 
341.6527412591224, 455.7819293907221, 118.78721414726083, 113.12590669842625, 
119.4659523061491, 372.2287412735386, 109.0029316620728, 125.83995532737556], 
"eval_len": [22, 39, 63, 84, 23, 22, 23, 69, 21, 25]}

 49%|████▉     | 489999/1000000 [8:14:30<6:21:44, 22.27it/s]global step 490000, trans_decision ep_re 191.2535358422763

{"global_step": 490000, "eval_re": [412.19309585530664, 141.64588401947003, 
114.67819670269016, 136.54741736455298, 127.79705120089004, 156.04249431747522, 
159.1741491186176, 402.3289166362668, 136.1608150066554, 125.96733820083827], 
"eval_len": [75, 27, 22, 26, 25, 30, 30, 72, 26, 24]}

 50%|████▉     | 499999/1000000 [8:24:40<6:24:33, 21.67it/s]global step 500000, trans_decision ep_re 201.95257886240628

{"global_step": 500000, "eval_re": [128.00935698742427, 144.63293080983172, 
126.62709552382026, 151.4850060559907, 397.0984681667541, 180.73572774864004, 
136.59701513044877, 182.1363471423369, 135.54046655778592, 436.66337450103015], 
"eval_len": [25, 28, 25, 29, 75, 34, 26, 35, 26, 81]}

 51%|█████     | 509999/1000000 [8:34:50<6:00:14, 22.67it/s]global step 510000, trans_decision ep_re 128.6871613978321

{"global_step": 510000, "eval_re": [157.9985778845408, 96.89650946480481, 
139.1364352362647, 122.63395568576966, 96.93742781679502, 135.47912157478996, 
156.90909027584556, 117.72853761903748, 139.58067677176626, 123.57128164870696],
"eval_len": [30, 19, 27, 24, 19, 27, 30, 23, 27, 24]}

 52%|█████▏    | 519999/1000000 [8:45:00<6:11:25, 21.54it/s]global step 520000, trans_decision ep_re 161.9349335359297

{"global_step": 520000, "eval_re": [141.21249209938787, 117.68300681766924, 
383.0307895717845, 152.26285158384735, 193.2174204039498, 162.41265896115237, 
112.38437793196164, 124.71293549919683, 129.81099980419276, 102.62180268615475],
"eval_len": [28, 23, 71, 29, 37, 31, 22, 24, 25, 20]}

 53%|█████▎    | 529999/1000000 [8:55:10<6:02:03, 21.64it/s]global step 530000, trans_decision ep_re 194.6495474081148

{"global_step": 530000, "eval_re": [129.88166394340112, 458.3510768475981, 
488.6838276371601, 124.26689339989802, 126.03015517573489, 157.75648202421146, 
109.69679962670367, 108.51736437869303, 102.50801267162144, 140.80319837612632],
"eval_len": [25, 86, 107, 24, 24, 30, 21, 21, 20, 27]}

 54%|█████▍    | 539999/1000000 [9:05:20<5:47:00, 22.09it/s]global step 540000, trans_decision ep_re 194.07915417640504

{"global_step": 540000, "eval_re": [135.75617824056556, 118.52282908511432, 
259.44750218082663, 120.00501812913392, 237.26852417923672, 288.552264063727, 
185.65472982792926, 108.06229203247511, 363.190455826429, 124.33174819861259], 
"eval_len": [26, 23, 49, 23, 45, 59, 36, 21, 69, 24]}

 55%|█████▍    | 549999/1000000 [9:15:30<5:46:26, 21.65it/s]global step 550000, trans_decision ep_re 219.7097499441823

{"global_step": 550000, "eval_re": [219.6305064268042, 124.63970511328301, 
447.0549283424229, 90.33075736323389, 441.7540103803768, 139.03556532662523, 
152.22741140512758, 108.22333679813832, 96.16445988305446, 378.03681840275635], 
"eval_len": [42, 24, 84, 18, 79, 27, 29, 21, 19, 69]}

 56%|█████▌    | 559998/1000000 [9:25:31<5:31:48, 22.10it/s]global step 560000, trans_decision ep_re 216.9301915772522

{"global_step": 560000, "eval_re": [119.64817722623333, 295.66985874955395, 
89.77589717979086, 416.46763220487435, 141.9400291864117, 103.50823266652067, 
325.0887029477797, 160.09519924568124, 137.6904501571874, 379.4177362084888], 
"eval_len": [23, 57, 18, 78, 27, 20, 61, 31, 27, 70]}

 57%|█████▋    | 569998/1000000 [9:36:00<5:18:26, 22.51it/s]global step 570000, trans_decision ep_re 152.65061342922334

{"global_step": 570000, "eval_re": [96.85045570818104, 112.5621211919393, 
386.0959301049017, 115.08070233328714, 115.05978066683733, 91.12297888970198, 
280.9341430717841, 102.17059320876044, 136.83587253038127, 89.79355658645916], 
"eval_len": [19, 22, 72, 22, 22, 18, 51, 20, 26, 18]}

 58%|█████▊    | 579999/1000000 [9:45:53<5:11:10, 22.50it/s]global step 580000, trans_decision ep_re 169.1166863801399

{"global_step": 580000, "eval_re": [128.60631523306515, 129.9027396848504, 
344.0235666268837, 108.80128804259752, 124.92389525254903, 136.10635182181056, 
124.74615412800328, 102.51026876964865, 382.3515731530209, 109.1947110889696], 
"eval_len": [25, 25, 64, 21, 24, 26, 24, 20, 69, 21]}

 59%|█████▉    | 589997/1000000 [9:56:02<5:14:50, 21.70it/s]global step 590000, trans_decision ep_re 169.62643213749416

{"global_step": 590000, "eval_re": [102.51420521118841, 139.78282681270426, 
90.00466594129509, 251.16703579425803, 145.60843951673232, 138.49066911520654, 
378.1071411865278, 186.05950599789765, 108.9057483340117, 155.6240834651198], 
"eval_len": [20, 27, 18, 48, 28, 27, 71, 36, 21, 30]}

 60%|█████▉    | 599998/1000000 [10:06:11<4:56:03, 22.52it/s]global step 600000, trans_decision ep_re 164.84142746111257

{"global_step": 600000, "eval_re": [122.77298184320985, 147.38570948701883, 
119.08905450909123, 292.35575723408476, 118.57899106088108, 286.58664209860865, 
168.46003812707715, 128.5034252004157, 139.93792348366495, 124.7437515670735], 
"eval_len": [24, 28, 23, 56, 23, 56, 32, 25, 27, 24]}

 61%|██████    | 609999/1000000 [10:16:21<4:53:58, 22.11it/s]global step 610000, trans_decision ep_re 241.8167456997864

{"global_step": 610000, "eval_re": [169.31805108212424, 955.0751639162407, 
175.99471826661016, 140.28030223319027, 120.09699213282515, 96.9441168139067, 
348.6775777019837, 150.27342606383456, 108.84763967396319, 152.65946911318483], 
"eval_len": [32, 180, 34, 27, 23, 19, 65, 29, 21, 29]}

 62%|██████▏   | 619998/1000000 [10:26:32<4:49:02, 21.91it/s]global step 620000, trans_decision ep_re 254.5081167482686

{"global_step": 620000, "eval_re": [114.46278411685797, 567.0155514917324, 
390.05563380707065, 162.0706831240296, 353.1484260356304, 178.5513368314688, 
129.30091749019186, 327.71830856732646, 158.2009132171554, 164.55661280122303], 
"eval_len": [22, 113, 75, 31, 66, 34, 25, 61, 31, 32]}

 63%|██████▎   | 629997/1000000 [10:36:43<4:35:43, 22.37it/s]global step 630000, trans_decision ep_re 147.225506907593

{"global_step": 630000, "eval_re": [119.86139395430617, 103.33753441518279, 
191.85109732862026, 125.8680278079676, 112.13681477817475, 96.87377658163852, 
111.43957288546585, 146.60940927099787, 123.81822330837109, 340.459218745205], 
"eval_len": [23, 20, 37, 24, 22, 19, 22, 29, 24, 63]}

 64%|██████▍   | 639997/1000000 [10:46:53<4:32:03, 22.05it/s]global step 640000, trans_decision ep_re 188.55426985751967

{"global_step": 640000, "eval_re": [114.14267157434523, 418.12967069637125, 
119.24961408883047, 114.86488829087392, 126.11575138864222, 285.10831029551446, 
366.257514325878, 113.35691378292746, 125.04875551307937, 103.26860861873429], 
"eval_len": [22, 80, 23, 22, 24, 55, 68, 22, 24, 20]}

 65%|██████▍   | 649999/1000000 [10:57:03<4:22:36, 22.21it/s]global step 650000, trans_decision ep_re 161.37363614659088

{"global_step": 650000, "eval_re": [129.94236600605527, 148.16124455016748, 
147.48262764598914, 184.59495129850305, 136.44722769346555, 310.8488896626959, 
154.9521506483982, 157.63847421896023, 108.24086500523494, 135.4275647364391], 
"eval_len": [25, 29, 29, 35, 26, 62, 30, 31, 21, 26]}

 66%|██████▌   | 659999/1000000 [11:07:30<4:19:47, 21.81it/s]global step 660000, trans_decision ep_re 197.45998888753226

{"global_step": 660000, "eval_re": [153.09222980040414, 125.12504655290569, 
185.06911130779176, 267.1704047955019, 426.7446835980126, 156.23828802799648, 
249.1800173640425, 125.25420026191127, 152.5349718297017, 134.19093533705444], 
"eval_len": [29, 24, 35, 54, 77, 30, 50, 24, 29, 26]}

 67%|██████▋   | 669999/1000000 [11:17:40<4:10:46, 21.93it/s]global step 670000, trans_decision ep_re 204.1409564212968

{"global_step": 670000, "eval_re": [314.53146790242624, 108.63847713148449, 
182.6924742793271, 254.36142045958536, 97.06527788925972, 90.87568150901129, 
114.93015375179205, 379.49993386795853, 145.7754037407158, 353.03927368140756], 
"eval_len": [61, 21, 36, 47, 19, 18, 22, 67, 28, 64]}

 68%|██████▊   | 679999/1000000 [11:27:50<4:02:12, 22.02it/s]global step 680000, trans_decision ep_re 252.42916090045338

{"global_step": 680000, "eval_re": [300.16754820661833, 195.91733661316397, 
165.3009776456906, 147.71749971985562, 152.1744219007194, 476.8704442370365, 
126.49173125105041, 409.3289726939112, 379.1142297075784, 171.2084470289094], 
"eval_len": [59, 37, 32, 29, 29, 93, 25, 78, 73, 33]}

 69%|██████▉   | 689999/1000000 [11:38:00<3:57:23, 21.76it/s]global step 690000, trans_decision ep_re 227.72759755449684

{"global_step": 690000, "eval_re": [289.547622372195, 354.68065609662705, 
140.36869366276483, 146.73391221422136, 145.55317102468612, 159.43409238451105, 
321.33754988931423, 129.17698124421642, 439.8934436523784, 150.54985300405392], 
"eval_len": [53, 65, 27, 28, 28, 30, 60, 25, 80, 29]}

 70%|██████▉   | 699999/1000000 [11:48:10<3:49:06, 21.82it/s]global step 700000, trans_decision ep_re 163.42473711469393

{"global_step": 700000, "eval_re": [125.2121581246096, 113.27142007553273, 
108.70361722302118, 107.30071564062496, 399.2263204800891, 118.94204074916715, 
142.06565687099507, 268.8337481519791, 131.68405083870982, 119.00764299221052], 
"eval_len": [24, 22, 21, 21, 76, 23, 27, 50, 26, 23]}

 71%|███████   | 709999/1000000 [11:58:20<3:37:03, 22.27it/s]global step 710000, trans_decision ep_re 176.60110126065263

{"global_step": 710000, "eval_re": [138.23394523159251, 84.60890844540941, 
162.65091186633194, 179.79111512983033, 141.06540763988446, 559.1563928229623, 
168.66054625473197, 133.44160902736624, 108.84701256586528, 89.55516362255175], 
"eval_len": [27, 17, 32, 34, 27, 105, 32, 26, 21, 18]}

 72%|███████▏  | 719999/1000000 [12:08:30<3:35:15, 21.68it/s]global step 720000, trans_decision ep_re 160.1400775146694

{"global_step": 720000, "eval_re": [129.05056590990137, 119.9339239226946, 
118.6168816383553, 119.5263513284811, 113.739618673153, 146.35267218384612, 
118.19999221155321, 106.86209530938645, 378.99534825085885, 250.123325718464], 
"eval_len": [25, 23, 23, 23, 22, 28, 23, 21, 69, 48]}

 73%|███████▎  | 729999/1000000 [12:18:31<3:24:51, 21.97it/s]global step 730000, trans_decision ep_re 269.1790102594133

{"global_step": 730000, "eval_re": [629.2657753964571, 318.72420239199243, 
148.3367053472905, 151.018894223182, 135.3057196045832, 97.43082488547448, 
183.03643356381167, 182.42080126973556, 125.24908966827331, 721.0016562433328], 
"eval_len": [122, 60, 29, 29, 26, 19, 35, 36, 24, 141]}

 74%|███████▍  | 739997/1000000 [12:29:00<3:17:33, 21.94it/s]global step 740000, trans_decision ep_re 242.4495712920297

{"global_step": 740000, "eval_re": [362.2953959687017, 125.2603618772811, 
368.80267063079583, 298.36262042999647, 420.4470622194602, 113.51506144680579, 
129.9994635047743, 102.76258733825279, 343.13502052302533, 159.91546898120345], 
"eval_len": [67, 24, 69, 58, 78, 22, 25, 20, 66, 31]}

 75%|███████▍  | 749998/1000000 [12:39:10<3:04:33, 22.58it/s]global step 750000, trans_decision ep_re 237.87851841454417

{"global_step": 750000, "eval_re": [209.99966597984005, 145.25194787543694, 
96.39676438642235, 375.9036353704975, 134.3989577008365, 145.65860361732044, 
547.40421936799, 474.8452781238675, 96.05357330648481, 152.8725384167456], 
"eval_len": [40, 28, 19, 70, 26, 28, 99, 88, 19, 29]}

 76%|███████▌  | 759997/1000000 [12:49:20<3:02:03, 21.97it/s]global step 760000, trans_decision ep_re 192.4249932556046

{"global_step": 760000, "eval_re": [114.5286036593725, 90.55656791163842, 
193.87258073175272, 273.7203142964544, 96.68195216215044, 330.0980000494559, 
108.41149965939164, 124.9757801851078, 140.0139722292119, 451.3906616715101], 
"eval_len": [22, 18, 37, 54, 19, 64, 21, 24, 27, 80]}

 77%|███████▋  | 769999/1000000 [12:59:30<2:53:22, 22.11it/s]global step 770000, trans_decision ep_re 224.5352437059173

{"global_step": 770000, "eval_re": [125.0843560882747, 162.59410503629982, 
560.0120361667655, 472.59094874747035, 109.07956606501047, 141.93221184424084, 
155.4734532389012, 123.5809432669987, 96.93560029343514, 298.06921631177664], 
"eval_len": [24, 31, 104, 89, 21, 27, 30, 24, 19, 54]}

 78%|███████▊  | 779999/1000000 [13:09:40<2:44:33, 22.28it/s]global step 780000, trans_decision ep_re 200.61345338668934

{"global_step": 780000, "eval_re": [152.32728677405134, 193.6063531790899, 
96.77971075893305, 111.09898569407764, 342.83450038435427, 225.98001269517016, 
168.24083683014965, 134.84701599563985, 429.2598034493131, 151.16002810611462], 
"eval_len": [29, 37, 19, 22, 64, 43, 32, 26, 76, 29]}

 79%|███████▉  | 789999/1000000 [13:19:41<2:39:45, 21.91it/s]global step 790000, trans_decision ep_re 224.683845129069

{"global_step": 790000, "eval_re": [205.80525139725748, 449.97635271814795, 
124.33472885308554, 208.4916414844335, 226.39285980577057, 126.30989906519288, 
97.1034343030766, 108.2053883455671, 277.98671920057996, 422.2321761175785], 
"eval_len": [39, 85, 24, 40, 43, 24, 19, 21, 53, 79]}

 80%|███████▉  | 799998/1000000 [13:29:53<2:31:08, 22.05it/s]global step 800000, trans_decision ep_re 174.39042511792553

{"global_step": 800000, "eval_re": [130.11269266550377, 108.16696904523695, 
341.41165490096785, 155.2562274692157, 108.6430045909213, 378.6056289339364, 
130.29080514675275, 120.11745408555198, 168.85037564223532, 102.4494386989333], 
"eval_len": [25, 21, 64, 30, 21, 67, 25, 23, 32, 20]}

 81%|████████  | 809998/1000000 [13:40:20<2:19:51, 22.64it/s]global step 810000, trans_decision ep_re 204.9482061035721

{"global_step": 810000, "eval_re": [134.33775663853868, 329.44741912075926, 
157.16904555066054, 134.86849330459714, 96.76908889396599, 135.45251589045148, 
296.55759973734473, 133.2503946910048, 91.2393638984454, 540.3903833099529], 
"eval_len": [26, 62, 30, 26, 19, 26, 58, 26, 18, 104]}

 82%|████████▏ | 819999/1000000 [13:50:30<2:14:49, 22.25it/s]global step 820000, trans_decision ep_re 256.49984890386156

{"global_step": 820000, "eval_re": [213.92544117985176, 516.946253585252, 
190.82703270696422, 276.82842624855726, 96.51678776871161, 334.29004047985836, 
206.7441116910134, 135.36815038552368, 472.990574894698, 120.5616700981859], 
"eval_len": [41, 94, 37, 53, 19, 64, 39, 26, 89, 23]}

 83%|████████▎ | 829999/1000000 [14:00:40<2:08:26, 22.06it/s]global step 830000, trans_decision ep_re 165.9708657072029

{"global_step": 830000, "eval_re": [139.59859694648918, 142.61977989663816, 
156.89657329006113, 416.8818470328269, 221.2044046196334, 96.87715552565942, 
107.287100435634, 162.3114434695856, 102.5420954443106, 113.48966041119063], 
"eval_len": [27, 27, 30, 79, 42, 19, 21, 31, 20, 22]}

 84%|████████▍ | 839999/1000000 [14:10:50<2:02:37, 21.75it/s]global step 840000, trans_decision ep_re 177.1820961941217

{"global_step": 840000, "eval_re": [113.43622365687958, 119.07209534141808, 
101.75157765699916, 119.34157262879489, 129.84434711602165, 147.05804420007516, 
233.6462856443824, 380.73622435501835, 308.8240995924874, 118.11049174914028], 
"eval_len": [22, 23, 20, 23, 25, 28, 44, 69, 59, 23]}

 85%|████████▍ | 849999/1000000 [14:21:00<1:52:56, 22.14it/s]global step 850000, trans_decision ep_re 191.3607135353886

{"global_step": 850000, "eval_re": [406.4693033900836, 127.47132162549066, 
156.68199439261608, 108.99423495750472, 180.91697193953965, 138.08481344983525, 
127.41727850169595, 120.30421132156363, 444.713236893023, 102.55376888253365], 
"eval_len": [72, 25, 30, 21, 34, 27, 25, 23, 87, 20]}

 86%|████████▌ | 859998/1000000 [14:31:02<1:45:26, 22.13it/s]global step 860000, trans_decision ep_re 267.3868339230636

{"global_step": 860000, "eval_re": [388.8002422375664, 280.69446750174455, 
161.20799686878846, 123.89067157613465, 160.75386390099652, 396.7491106153881, 
325.3368662996667, 308.4175724397009, 107.87201509164757, 420.145532699002], 
"eval_len": [72, 53, 31, 24, 31, 75, 61, 61, 21, 70]}

 87%|████████▋ | 869998/1000000 [14:41:31<1:36:28, 22.46it/s]global step 870000, trans_decision ep_re 179.66941548779974

{"global_step": 870000, "eval_re": [124.32427209942684, 102.27035034602854, 
131.18633066385027, 411.7722181756909, 141.15771881452085, 388.3119841325154, 
114.2093885856314, 155.5048472951311, 118.69763295845803, 109.25941180674415], 
"eval_len": [24, 20, 26, 77, 27, 72, 22, 30, 23, 21]}

 88%|████████▊ | 879999/1000000 [14:51:41<1:31:04, 21.96it/s]global step 880000, trans_decision ep_re 152.3936580979068

{"global_step": 880000, "eval_re": [130.2814806039792, 101.80485313542778, 
209.9990574928524, 119.20401221951163, 108.61179181515229, 246.12440504880323, 
151.43869273692493, 171.89031853448614, 158.12309433894686, 126.4588750529836], 
"eval_len": [25, 20, 41, 23, 21, 47, 30, 33, 30, 24]}

 89%|████████▉ | 889999/1000000 [15:01:51<1:22:13, 22.30it/s]global step 890000, trans_decision ep_re 171.3928067750435

{"global_step": 890000, "eval_re": [133.1743332418889, 108.67167092225695, 
113.50837178611555, 403.71794951464994, 102.62637760967675, 204.20575809053693, 
214.405594234262, 129.45241573256507, 188.92117409667563, 115.24442252180735], 
"eval_len": [26, 21, 22, 73, 20, 39, 40, 25, 36, 22]}

 90%|████████▉ | 899999/1000000 [15:12:01<1:15:35, 22.05it/s]global step 900000, trans_decision ep_re 223.76904360906633

{"global_step": 900000, "eval_re": [141.78248875374672, 485.2112537906724, 
179.18994350151138, 150.05268685079906, 507.06436847723074, 178.91376718230018, 
124.05714240166803, 118.1290226229513, 158.45475422710612, 194.83500828267725], 
"eval_len": [27, 92, 34, 29, 92, 36, 24, 23, 30, 37]}

 91%|█████████ | 909999/1000000 [15:22:11<1:08:17, 21.96it/s]global step 910000, trans_decision ep_re 136.94439215522283

{"global_step": 910000, "eval_re": [196.25909515822886, 140.00580755187687, 
102.27778519211803, 160.22919945317477, 139.65321015722756, 119.12755994500121, 
130.28883360214516, 149.65186370482328, 103.59320212125613, 128.35736466637647],
"eval_len": [37, 27, 20, 31, 27, 23, 25, 29, 20, 25]}

 92%|█████████▏| 919999/1000000 [15:32:11<1:01:51, 21.55it/s]global step 920000, trans_decision ep_re 178.00552752123605

{"global_step": 920000, "eval_re": [153.99464920190854, 179.92478342149514, 
119.55911664857976, 129.09355588928491, 108.16537656635253, 103.1990750958612, 
123.30349135737123, 156.17055423591663, 416.6190248138021, 290.0256479817884], 
"eval_len": [29, 34, 23, 25, 21, 20, 24, 30, 80, 57]}

 93%|█████████▎| 929997/1000000 [15:42:23<54:21, 21.46it/s]global step 930000, trans_decision ep_re 163.46694291182087

{"global_step": 930000, "eval_re": [114.74311865908237, 133.6842274439543, 
200.34573143796905, 126.3402296134104, 168.6517895626646, 128.46122392698877, 
385.6821819719681, 134.9018791522491, 112.91247173889917, 128.94657561102287], 
"eval_len": [22, 26, 38, 24, 33, 25, 71, 26, 22, 25]}

 94%|█████████▍| 939998/1000000 [15:52:51<44:46, 22.33it/s]global step 940000, trans_decision ep_re 161.92894473593412

{"global_step": 940000, "eval_re": [108.06498715176153, 102.59811245275452, 
211.9343785902317, 267.39623009062126, 103.62886840893782, 183.0159288777456, 
133.87258094137698, 297.71022389071453, 102.42580171364621, 108.64233524155118],
"eval_len": [21, 20, 40, 51, 20, 35, 26, 58, 20, 21]}

 95%|█████████▍| 949999/1000000 [16:03:01<38:13, 21.80it/s]global step 950000, trans_decision ep_re 283.9034299182981

{"global_step": 950000, "eval_re": [151.20792779724334, 151.6891020247712, 
339.1601920057637, 376.589959416553, 260.9498356236618, 434.96638206826873, 
151.01443343128523, 119.07048019495528, 359.536017654533, 494.84996896594544], 
"eval_len": [29, 29, 64, 73, 49, 79, 29, 23, 67, 94]}

 96%|█████████▌| 959998/1000000 [16:13:21<30:23, 21.94it/s]global step 960000, trans_decision ep_re 285.20615846247637

{"global_step": 960000, "eval_re": [541.6290684586669, 150.20914609322622, 
141.70607415012898, 130.18679883351982, 417.68076457336724, 106.73038718158412, 
134.73946567454226, 528.2274099197753, 150.9171799300094, 550.0352898099436], 
"eval_len": [96, 29, 27, 25, 75, 21, 26, 114, 29, 96]}

 97%|█████████▋| 969999/1000000 [16:23:31<23:08, 21.61it/s]global step 970000, trans_decision ep_re 167.7371986267493

{"global_step": 970000, "eval_re": [277.1077647598619, 123.9031232472844, 
466.4206874894535, 131.01191152919756, 117.91172798546154, 102.59944170935596, 
118.95972731923798, 128.87634293630978, 114.08984270317319, 96.49141658815721], 
"eval_len": [54, 24, 79, 25, 23, 20, 23, 25, 22, 19]}

 98%|█████████▊| 979999/1000000 [16:33:51<14:55, 22.34it/s]global step 980000, trans_decision ep_re 157.1719292971024

{"global_step": 980000, "eval_re": [311.9487877245889, 90.79912863100962, 
114.02857808343865, 147.92485582640992, 195.94096568054, 102.60684819263739, 
109.39580913597557, 162.51795143199146, 192.56902555634755, 143.98734270808484],
"eval_len": [57, 18, 22, 28, 37, 20, 21, 31, 36, 28]}

 99%|█████████▉| 989999/1000000 [16:43:51<07:34, 22.00it/s]global step 990000, trans_decision ep_re 129.6759661422296

{"global_step": 990000, "eval_re": [109.37520687797145, 124.94143964915573, 
109.56120228544447, 235.02691402389223, 109.05799065598885, 95.4602664628131, 
134.08931734519967, 101.67467326110577, 123.97142015446147, 153.60123070626352],
"eval_len": [21, 24, 21, 45, 21, 19, 27, 20, 24, 29]}

100%|█████████▉| 999997/1000000 [16:54:21<00:00, 21.51it/s]global step 1000000, trans_decision ep_re 234.34036630848027

{"global_step": 1000000, "eval_re": [139.6319497388501, 589.8229203849038, 
103.3340258348861, 146.69466880858755, 647.9577403841706, 135.88537876760415, 
244.55240255056628, 90.77789358874374, 120.66061067028558, 124.08607235620461], 
"eval_len": [27, 109, 20, 29, 113, 26, 46, 18, 23, 24]}

100%|██████████| 1000000/1000000 [16:54:21<00:00, 16.43it/s]
