
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [06:30<15:01:50, 18.30it/s]global step 10000, trans_decision ep_re 196.19253238064238

{"global_step": 10000, "eval_re": [172.4881415088756, 170.912157731151, 
361.28801210735037, 200.93983024843882, 108.08006718488348, 320.21265307752446, 
97.01907142520352, 96.80885867342863, 313.60389273235427, 120.57263911721346], 
"eval_len": [33, 33, 69, 38, 21, 61, 19, 19, 60, 23]}

  2%|▏         | 19998/1000000 [19:00<14:41:20, 18.53it/s]global step 20000, trans_decision ep_re 157.56562194274736

{"global_step": 20000, "eval_re": [107.63376373063096, 118.93316122244566, 
175.09999945846633, 267.3770185332749, 202.50545290608267, 136.0502997553673, 
114.05362178279591, 215.29711353259168, 120.08801001490343, 118.61777849091483],
"eval_len": [21, 23, 33, 50, 41, 26, 22, 41, 23, 23]}

  3%|▎         | 29998/1000000 [31:30<14:42:02, 18.33it/s]global step 30000, trans_decision ep_re 177.82768235802325

{"global_step": 30000, "eval_re": [287.342514540967, 130.1496431286882, 
125.3015128728722, 111.793013853443, 161.55701811238183, 101.87431051579442, 
105.9126107960223, 267.67256624502204, 373.2162964466855, 113.45733706835608], 
"eval_len": [55, 25, 24, 22, 31, 20, 21, 54, 72, 22]}

  4%|▍         | 39998/1000000 [44:00<14:34:04, 18.31it/s]global step 40000, trans_decision ep_re 189.7774876516399

{"global_step": 40000, "eval_re": [157.24393298988522, 321.24437176805776, 
167.7208046807604, 167.29630301236787, 153.26440564418306, 151.73527451107324, 
184.80184424936223, 379.0487586563411, 89.56960533209356, 125.84957567227444], 
"eval_len": [30, 63, 32, 32, 30, 29, 36, 71, 18, 24]}

  5%|▍         | 49998/1000000 [56:30<14:26:34, 18.27it/s]global step 50000, trans_decision ep_re 151.2689949555642

{"global_step": 50000, "eval_re": [160.98618786369389, 107.45477360592915, 
149.8549542857188, 141.5253872112996, 146.26853194431993, 151.9862764717853, 
221.35381702844762, 114.16877800281696, 157.86039362656996, 161.23084951506092],
"eval_len": [32, 21, 29, 27, 28, 29, 42, 22, 30, 31]}

  6%|▌         | 59998/1000000 [1:09:00<14:22:34, 18.16it/s]global step 60000, trans_decision ep_re 209.40888399102852

{"global_step": 60000, "eval_re": [89.74937620125549, 191.18176866622045, 
181.81699193051082, 155.22487841523636, 140.31192562953865, 130.00969568788832, 
180.45022169508968, 506.79171803059586, 114.09434601259298, 404.4579176413565], 
"eval_len": [18, 36, 36, 30, 27, 25, 35, 96, 22, 75]}

  7%|▋         | 69998/1000000 [1:21:20<14:04:19, 18.36it/s]global step 70000, trans_decision ep_re 165.74391763375226

{"global_step": 70000, "eval_re": [124.6173161927789, 124.8387216322872, 
125.1033933763002, 103.04243595081577, 181.75281439113135, 129.54264290152182, 
121.46119965530725, 144.4569205958105, 410.0004569676553, 192.62327467391444], 
"eval_len": [24, 24, 24, 20, 35, 25, 24, 28, 81, 37]}

  8%|▊         | 79998/1000000 [1:33:50<13:51:16, 18.45it/s]global step 80000, trans_decision ep_re 128.8566684857012

{"global_step": 80000, "eval_re": [125.8290597800462, 129.45212623233078, 
119.27302032416507, 154.72008457530066, 184.73828455074457, 115.17668528182443, 
102.50558500060137, 145.5038112440976, 96.85170927805899, 114.51631858984246], 
"eval_len": [24, 25, 23, 30, 36, 22, 20, 28, 19, 22]}

  9%|▉         | 89998/1000000 [1:46:10<13:42:10, 18.45it/s]global step 90000, trans_decision ep_re 233.6209774701603

{"global_step": 90000, "eval_re": [216.05973177585565, 166.71822667730055, 
131.4237033193429, 581.6938041085805, 166.55686007767432, 341.7016553195704, 
107.95209536875666, 373.496839931312, 141.5119531244513, 109.09490499875899], 
"eval_len": [41, 32, 25, 110, 32, 64, 21, 69, 27, 21]}

 10%|▉         | 99998/1000000 [1:58:40<13:41:14, 18.27it/s]global step 100000, trans_decision ep_re 169.99589644335103

{"global_step": 100000, "eval_re": [96.43069918870498, 191.1521036021216, 
119.91982156928185, 134.52092196769252, 143.81438035938902, 217.67282845671, 
146.43095479016787, 386.8357756643796, 154.32881888424546, 108.8526599508174], 
"eval_len": [19, 37, 23, 26, 28, 42, 28, 74, 30, 21]}

 11%|█         | 109998/1000000 [2:11:10<13:28:17, 18.35it/s]global step 110000, trans_decision ep_re 147.76132849407927

{"global_step": 110000, "eval_re": [144.5714035505672, 166.55751491953836, 
101.1854041002694, 252.8833675597443, 146.18901273598618, 168.28304965078723, 
162.2585001150323, 131.58642643540162, 113.52354872930812, 90.5750571441582], 
"eval_len": [28, 32, 20, 49, 28, 33, 31, 25, 22, 18]}

 12%|█▏        | 119998/1000000 [2:23:40<13:17:35, 18.39it/s]global step 120000, trans_decision ep_re 131.69086864370914

{"global_step": 120000, "eval_re": [90.72087204302798, 112.97706287934838, 
179.0737547974178, 117.90083484559982, 147.17810881249576, 103.06355644614, 
154.79696027861354, 145.92061351368758, 124.75859469940286, 140.51832812135757],
"eval_len": [18, 22, 34, 23, 28, 20, 30, 28, 24, 27]}

 13%|█▎        | 129998/1000000 [2:36:10<13:04:41, 18.48it/s]global step 130000, trans_decision ep_re 187.67090364715256

{"global_step": 130000, "eval_re": [164.92075075437424, 175.84159693939765, 
136.01365119818863, 158.40155487160524, 302.05364043028595, 163.85080889408653, 
153.38854991263193, 171.01883240988772, 108.7531539156145, 342.4664971454531], 
"eval_len": [32, 34, 26, 30, 57, 32, 30, 33, 21, 65]}

 14%|█▍        | 139998/1000000 [2:48:30<12:54:51, 18.50it/s]global step 140000, trans_decision ep_re 125.45024867111647

{"global_step": 140000, "eval_re": [130.11108796468994, 143.9411094983974, 
109.0237479978904, 148.26122058316264, 108.40796393733069, 119.50485746063923, 
97.2497770444141, 108.04094715295074, 186.61431083495458, 103.34746423673502], 
"eval_len": [25, 28, 21, 29, 21, 23, 19, 21, 36, 20]}

 15%|█▍        | 149998/1000000 [3:01:00<12:44:20, 18.53it/s]global step 150000, trans_decision ep_re 141.56588431998802

{"global_step": 150000, "eval_re": [107.46749527263103, 102.86676247735672, 
102.48547995746532, 172.53206960502817, 140.97249923145102, 201.0442818259672, 
166.54784984780008, 123.15407471266501, 180.26841926417723, 118.31991100533844],
"eval_len": [21, 20, 20, 33, 27, 39, 32, 24, 36, 23]}

 16%|█▌        | 159998/1000000 [3:13:20<12:36:03, 18.52it/s]global step 160000, trans_decision ep_re 180.61033701701686

{"global_step": 160000, "eval_re": [102.94233479102584, 156.57325766490936, 
96.85280062579744, 168.55291333145888, 135.45591271191773, 319.5544532093525, 
124.3327487926877, 388.84622956823455, 156.55329785429097, 156.43942162049353], 
"eval_len": [20, 30, 19, 32, 26, 64, 24, 71, 30, 30]}

 17%|█▋        | 169998/1000000 [3:25:50<12:28:45, 18.48it/s]global step 170000, trans_decision ep_re 136.78457366783505

{"global_step": 170000, "eval_re": [157.2888783229573, 113.91876017710995, 
325.662750599768, 107.69867884239963, 96.23186252614427, 97.00700356503616, 
103.13180919017019, 125.48387822788573, 109.21071647524758, 132.21139875163166],
"eval_len": [30, 22, 61, 21, 19, 19, 20, 24, 21, 26]}

 18%|█▊        | 179998/1000000 [3:38:10<12:18:19, 18.51it/s]global step 180000, trans_decision ep_re 164.0049565361331

{"global_step": 180000, "eval_re": [114.48266104973261, 383.16926430902464, 
142.2819322585508, 117.76910618451203, 140.63192049673677, 103.14728518311358, 
125.22004844606543, 102.30651864384497, 184.74583820325597, 226.2949905864943], 
"eval_len": [22, 72, 27, 23, 27, 20, 24, 20, 36, 43]}

 19%|█▉        | 189998/1000000 [3:50:30<12:07:53, 18.55it/s]global step 190000, trans_decision ep_re 144.64201746724206

{"global_step": 190000, "eval_re": [128.85540642710455, 175.8764751846468, 
108.84412304693552, 147.7040463751763, 159.3321137366174, 96.6505513181044, 
177.37720284357727, 160.63442777726885, 159.76553180106225, 131.38029616192748],
"eval_len": [25, 33, 21, 28, 30, 19, 34, 31, 33, 25]}

 20%|█▉        | 199998/1000000 [4:03:00<12:02:52, 18.44it/s]global step 200000, trans_decision ep_re 201.5818464270776

{"global_step": 200000, "eval_re": [119.80756261302696, 127.39515580063365, 
131.28078658910516, 146.44186300710027, 166.50850841347918, 173.95368306583268, 
413.40891757587224, 102.55366521916002, 424.6729685027094, 209.79535348385673], 
"eval_len": [23, 25, 25, 28, 34, 33, 78, 20, 86, 40]}

 21%|██        | 209998/1000000 [4:15:20<11:52:24, 18.48it/s]global step 210000, trans_decision ep_re 136.14963075220697

{"global_step": 210000, "eval_re": [119.14125867446404, 95.68325392889099, 
154.22589531481842, 167.92434653278642, 139.7229215340389, 112.01096397796256, 
131.60164991796086, 165.08801890105835, 129.00207246525585, 147.0959262748335], 
"eval_len": [23, 19, 30, 32, 27, 22, 25, 32, 25, 28]}

 22%|██▏       | 219998/1000000 [4:27:50<11:46:28, 18.40it/s]global step 220000, trans_decision ep_re 130.97523778836617

{"global_step": 220000, "eval_re": [156.6526894734393, 89.6229397681302, 
135.17404308712844, 157.5153913195881, 154.49657594112082, 146.00757458880972, 
155.3739675879423, 95.98142771765781, 108.25285684058558, 110.67491155925936], 
"eval_len": [30, 18, 26, 30, 30, 28, 30, 19, 21, 22]}

 23%|██▎       | 229998/1000000 [4:40:10<11:32:28, 18.53it/s]global step 230000, trans_decision ep_re 168.12057510950643

{"global_step": 230000, "eval_re": [133.9777234922492, 156.7668165021865, 
170.0593127373801, 283.0478681212319, 184.22588101722758, 149.91136391076773, 
185.12908360656357, 164.0769252078272, 128.90691340710384, 125.10386309252681], 
"eval_len": [26, 30, 33, 53, 36, 29, 35, 32, 25, 24]}

 24%|██▍       | 239998/1000000 [4:52:40<11:23:53, 18.52it/s]global step 240000, trans_decision ep_re 112.69412560203766

{"global_step": 240000, "eval_re": [113.26625956818269, 107.28999573777821, 
113.5141446965305, 106.81560843164364, 126.17641518493923, 90.69471455502881, 
89.86925342138922, 111.46150051170547, 130.9104726136726, 136.9428912995061], 
"eval_len": [22, 21, 22, 21, 24, 18, 18, 22, 25, 26]}

 25%|██▍       | 249998/1000000 [5:05:00<11:16:12, 18.49it/s]global step 250000, trans_decision ep_re 169.65023393368128

{"global_step": 250000, "eval_re": [101.42301026534422, 154.4836534651515, 
182.32077223209177, 112.39780489482757, 406.09652401188254, 118.44097705338002, 
148.3311751532104, 135.73637880653254, 187.58897306618317, 149.68307038820927], 
"eval_len": [20, 30, 35, 22, 78, 23, 28, 26, 36, 29]}

 26%|██▌       | 259998/1000000 [5:17:30<11:08:12, 18.46it/s]global step 260000, trans_decision ep_re 156.54194045075786

{"global_step": 260000, "eval_re": [137.76408600738793, 362.99657631538497, 
103.16615300012717, 136.19530195748115, 114.10269312577127, 146.73579400758175, 
108.39588630624796, 141.20672881150324, 157.80520555788837, 157.05097941820455],
"eval_len": [26, 70, 20, 26, 22, 28, 21, 28, 30, 30]}

 27%|██▋       | 269998/1000000 [5:29:50<10:58:41, 18.47it/s]global step 270000, trans_decision ep_re 157.8227698112329

{"global_step": 270000, "eval_re": [130.96952708894497, 143.76268966650582, 
378.54631519350494, 170.28714472357726, 123.70359679008209, 145.9629131734377, 
136.61663458591323, 103.16853859696813, 127.8818500906558, 117.32848820273908], 
"eval_len": [25, 28, 68, 32, 24, 28, 26, 20, 25, 23]}

 28%|██▊       | 279998/1000000 [5:42:20<10:49:52, 18.47it/s]global step 280000, trans_decision ep_re 138.32895263799358

{"global_step": 280000, "eval_re": [102.63416413680723, 123.27598319479783, 
150.73513929234943, 127.28703121038856, 152.29105649554057, 119.20429003435132, 
161.274911289045, 169.48886907816484, 141.35461036504336, 135.74347128344766], 
"eval_len": [20, 24, 29, 25, 29, 23, 31, 32, 27, 26]}

 29%|██▉       | 289998/1000000 [5:54:40<10:38:14, 18.54it/s]global step 290000, trans_decision ep_re 197.0752592108894

{"global_step": 290000, "eval_re": [181.80965599520476, 148.56723388214692, 
168.21622232970932, 124.34267237256547, 430.15288402371385, 150.49395879073688, 
148.83428181128548, 378.63199278743446, 119.73221380788318, 119.9714763082138], 
"eval_len": [35, 29, 33, 24, 79, 29, 29, 73, 23, 23]}

 30%|██▉       | 299998/1000000 [6:07:10<10:31:23, 18.48it/s]global step 300000, trans_decision ep_re 136.9731613711253

{"global_step": 300000, "eval_re": [102.58431023742696, 119.08374059532242, 
102.39445005278883, 113.89224411840563, 174.52339938378293, 216.9632944461818, 
157.82303724463156, 96.02916739606823, 119.801996889229, 166.6359733474153], 
"eval_len": [20, 23, 20, 22, 33, 41, 30, 19, 23, 33]}

 31%|███       | 309998/1000000 [6:19:30<10:19:36, 18.56it/s]global step 310000, trans_decision ep_re 131.22620490336143

{"global_step": 310000, "eval_re": [158.88696571757492, 144.8133568631082, 
118.50008883649821, 124.8770576861549, 108.24884363412872, 107.58227727366771, 
102.75750310211318, 112.30966153394779, 209.84762550086103, 124.4386688855597], 
"eval_len": [30, 28, 23, 24, 21, 21, 20, 22, 41, 24]}

 32%|███▏      | 319998/1000000 [6:32:00<10:16:50, 18.37it/s]global step 320000, trans_decision ep_re 141.18485131600568

{"global_step": 320000, "eval_re": [158.42957655144173, 172.55795933047054, 
161.86778707968804, 160.4900805641414, 159.96327219628827, 162.06824485171205, 
112.55495349007775, 107.6125710342925, 102.76094314443392, 113.54312491751062], 
"eval_len": [31, 33, 31, 31, 31, 31, 22, 21, 20, 22]}

 33%|███▎      | 329998/1000000 [6:44:20<10:06:29, 18.41it/s]global step 330000, trans_decision ep_re 139.56246285606022

{"global_step": 330000, "eval_re": [138.13213295377855, 146.92634944222715, 
113.28612896301857, 96.1762939115701, 156.13462517160843, 103.01022924101159, 
125.97089990082937, 124.78258646542089, 271.01959331524336, 120.18578919589392],
"eval_len": [27, 28, 22, 19, 30, 20, 24, 24, 52, 23]}

 34%|███▍      | 339998/1000000 [6:56:50<9:54:19, 18.51it/s]global step 340000, trans_decision ep_re 175.0807632876892

{"global_step": 340000, "eval_re": [366.6334886051395, 89.63936271152056, 
123.45026193914806, 344.78895985830127, 171.27468456518892, 108.5564451347847, 
136.67722943096408, 167.0019069210063, 102.22345501545188, 140.56183869538677], 
"eval_len": [71, 18, 24, 65, 33, 21, 26, 32, 20, 27]}

 35%|███▍      | 349998/1000000 [7:09:10<9:46:18, 18.48it/s]global step 350000, trans_decision ep_re 148.5639647399733

{"global_step": 350000, "eval_re": [176.9110682238029, 151.74351422495113, 
119.55465308303266, 147.59088993253474, 193.7478921368278, 117.79560738631896, 
127.45863848591021, 152.36502726896305, 127.88940422466696, 170.58295243272454],
"eval_len": [35, 29, 23, 28, 37, 23, 25, 29, 25, 33]}

 36%|███▌      | 359998/1000000 [7:21:40<9:36:46, 18.49it/s]global step 360000, trans_decision ep_re 137.16182111095216

{"global_step": 360000, "eval_re": [114.64994070161865, 168.4527060492254, 
122.20202467057013, 113.0492655476732, 168.27574207036707, 105.61871848031518, 
195.06901820917687, 168.94590079313477, 95.87098363389556, 119.48391095354471], 
"eval_len": [22, 32, 24, 22, 32, 21, 38, 32, 19, 23]}

 37%|███▋      | 369998/1000000 [7:34:00<9:27:14, 18.51it/s]global step 370000, trans_decision ep_re 174.18929244109262

{"global_step": 370000, "eval_re": [156.9534022134583, 137.93900195938897, 
129.60115746623134, 347.53046105507576, 141.69976318026465, 108.86557325532539, 
100.79515029553208, 343.5056489518157, 102.62194547552545, 172.38082055830847], 
"eval_len": [30, 27, 25, 66, 27, 21, 20, 65, 20, 33]}

 38%|███▊      | 379998/1000000 [7:46:30<9:19:14, 18.48it/s]global step 380000, trans_decision ep_re 145.30649678971199

{"global_step": 380000, "eval_re": [126.11497495087376, 146.11024621027403, 
102.97933901706884, 119.3569374602454, 207.27263406608176, 141.26211056565242, 
173.66473734208904, 140.800033249521, 154.7500290759427, 140.75392595937114], 
"eval_len": [24, 28, 20, 23, 41, 27, 34, 27, 30, 27]}

 39%|███▉      | 389998/1000000 [7:58:50<9:09:58, 18.49it/s]global step 390000, trans_decision ep_re 126.97493706417507

{"global_step": 390000, "eval_re": [109.29921107227547, 139.97202495518874, 
172.43506914170786, 115.06026241646427, 114.17102752592717, 125.63279259382831, 
108.16484268621035, 156.4551177340675, 126.31850437311336, 102.24051814296769], 
"eval_len": [21, 27, 33, 22, 22, 24, 21, 30, 24, 20]}

 40%|███▉      | 399998/1000000 [8:11:20<8:59:24, 18.54it/s]global step 400000, trans_decision ep_re 159.46119066260025

{"global_step": 400000, "eval_re": [390.86805871001553, 152.88817755911836, 
154.57322067984387, 108.11891532450622, 108.31918830492215, 96.63257728366696, 
156.39752623926844, 157.62802867371457, 114.62409259840712, 154.5621212525392], 
"eval_len": [71, 30, 30, 21, 21, 19, 31, 30, 22, 30]}

 41%|████      | 409998/1000000 [8:23:40<8:51:25, 18.50it/s]global step 410000, trans_decision ep_re 166.67530474745536

{"global_step": 410000, "eval_re": [367.6674529586765, 125.84210871392968, 
158.8628525330755, 150.6738066114168, 172.30437660904238, 168.05430149218319, 
161.64750986824671, 118.58812654935177, 129.12361156796482, 113.9889005706662], 
"eval_len": [69, 25, 31, 29, 33, 33, 31, 23, 25, 22]}

 42%|████▏     | 419998/1000000 [8:36:10<8:45:03, 18.41it/s]global step 420000, trans_decision ep_re 144.2742090164955

{"global_step": 420000, "eval_re": [193.26521245782885, 108.76093150092701, 
138.7272028038108, 129.7949247616521, 232.74021583402313, 117.56113235520355, 
131.57362867349042, 113.51875225236681, 120.32604452196531, 156.47404500368705],
"eval_len": [40, 21, 27, 25, 47, 23, 25, 22, 23, 30]}

 43%|████▎     | 429998/1000000 [8:48:30<8:37:13, 18.37it/s]global step 430000, trans_decision ep_re 128.70597779876738

{"global_step": 430000, "eval_re": [136.9518155535755, 129.7548844752812, 
145.98372856659142, 114.37317690089586, 96.0559300305695, 109.3515832165812, 
117.82681474500971, 185.56718953301237, 148.11171694559826, 103.08293802055854],
"eval_len": [26, 25, 28, 22, 19, 21, 23, 37, 29, 20]}

 44%|████▍     | 439998/1000000 [9:01:00<8:24:26, 18.50it/s]global step 440000, trans_decision ep_re 133.70455121466765

{"global_step": 440000, "eval_re": [166.9353977790588, 183.82511540371377, 
107.42363725418718, 117.27629230320996, 103.751720165738, 143.2722685029277, 
169.6965957699171, 108.72299559648158, 122.86263268539426, 113.27885668604827], 
"eval_len": [32, 36, 21, 23, 20, 28, 33, 21, 24, 22]}

 45%|████▍     | 449998/1000000 [9:13:30<8:17:15, 18.43it/s]global step 450000, trans_decision ep_re 140.76997652429563

{"global_step": 450000, "eval_re": [133.06831761295967, 153.68684218005887, 
132.15710198400257, 173.8621926487713, 113.95409479350158, 119.19320123810124, 
172.6793418095089, 111.2448202563523, 138.331006439848, 159.52284627985193], 
"eval_len": [26, 30, 26, 33, 22, 23, 34, 22, 27, 31]}

 46%|████▌     | 459998/1000000 [9:25:50<8:10:23, 18.35it/s]global step 460000, trans_decision ep_re 147.77007000649155

{"global_step": 460000, "eval_re": [125.50093760572135, 135.27022842067555, 
170.53357782663372, 139.4164948609536, 130.02074959996767, 119.74976190885776, 
145.47301555899182, 268.20294924458136, 136.3234395847708, 107.20954545376203], 
"eval_len": [24, 26, 32, 27, 25, 23, 28, 52, 26, 21]}

 47%|████▋     | 469998/1000000 [9:38:20<7:58:09, 18.47it/s]global step 470000, trans_decision ep_re 123.39364168583226

{"global_step": 470000, "eval_re": [165.12883614716503, 128.3854492984658, 
125.44018572661334, 114.28139742975154, 124.83036270387609, 107.84128002525247, 
97.190968751679, 166.35694433261384, 96.82424394013405, 107.65674850277183], 
"eval_len": [32, 25, 24, 22, 24, 21, 19, 31, 19, 21]}

 48%|████▊     | 479998/1000000 [9:50:40<7:47:18, 18.55it/s]global step 480000, trans_decision ep_re 132.3248834288611

{"global_step": 480000, "eval_re": [123.9316822575375, 172.23874665757273, 
119.04459375375, 114.78648568559683, 130.39958593898734, 108.75909208075092, 
164.62178371523945, 114.17692946051687, 149.66164251357264, 125.62829222508682],
"eval_len": [24, 33, 23, 22, 25, 21, 32, 22, 29, 24]}

 49%|████▉     | 489998/1000000 [10:03:10<7:41:40, 18.41it/s]global step 490000, trans_decision ep_re 135.9725817088045

{"global_step": 490000, "eval_re": [96.90677504851644, 134.75280497566516, 
96.494349803983, 190.92344021327673, 131.12786772585326, 155.98585247770643, 
130.61218328145043, 134.7972771509958, 139.77313339486028, 148.35213301573762], 
"eval_len": [19, 26, 19, 37, 25, 30, 25, 26, 27, 28]}

 50%|████▉     | 499998/1000000 [10:15:30<7:33:20, 18.38it/s]global step 500000, trans_decision ep_re 134.45027469634374

{"global_step": 500000, "eval_re": [108.44993914890526, 150.5369883703586, 
146.9347017354584, 183.53153781925587, 130.23376846437824, 109.39570174496116, 
155.8422449641905, 118.64944725846367, 139.4821157804548, 101.44630167701092], 
"eval_len": [21, 29, 28, 35, 25, 21, 30, 23, 27, 20]}

 51%|█████     | 509998/1000000 [10:28:00<7:22:33, 18.45it/s]global step 510000, trans_decision ep_re 149.76196658942231

{"global_step": 510000, "eval_re": [150.71703326843283, 144.97091473155692, 
229.3076515219624, 102.27174045544915, 154.17158630272138, 113.70916235702063, 
197.85886383992835, 101.74364432854208, 169.11965633997644, 133.74941274863275],
"eval_len": [30, 29, 44, 20, 30, 22, 38, 20, 32, 26]}

 52%|█████▏    | 519998/1000000 [10:40:30<7:15:15, 18.38it/s]global step 520000, trans_decision ep_re 146.01879639249404

{"global_step": 520000, "eval_re": [112.64322041651715, 130.24648204851937, 
153.67036073507822, 120.38953913195884, 203.98913833980697, 203.8123482908438, 
123.7437444212468, 167.83112939284254, 120.44726816065389, 123.41473298747238], 
"eval_len": [22, 25, 29, 23, 40, 41, 24, 33, 23, 24]}

 53%|█████▎    | 529998/1000000 [10:52:50<7:06:28, 18.37it/s]global step 530000, trans_decision ep_re 139.59132424992364

{"global_step": 530000, "eval_re": [136.75895139218846, 103.13768694836881, 
109.68074949657979, 97.01673455088958, 152.73262483081245, 96.09421610865134, 
119.85344744267346, 150.0047172725844, 183.57195501349636, 247.0621594429917], 
"eval_len": [26, 20, 21, 19, 29, 19, 23, 29, 35, 46]}

 54%|█████▍    | 539998/1000000 [11:05:20<6:54:57, 18.48it/s]global step 540000, trans_decision ep_re 152.40930582346175

{"global_step": 540000, "eval_re": [146.19874128378189, 188.68125416599213, 
125.34467408288997, 165.09223139988794, 172.326536802368, 148.32564687968951, 
133.870282124193, 131.06738362871445, 167.96432135639674, 145.2219865107038], 
"eval_len": [28, 36, 24, 31, 33, 29, 26, 25, 32, 28]}

 55%|█████▍    | 549998/1000000 [11:17:40<6:49:04, 18.33it/s]global step 550000, trans_decision ep_re 146.26800930105108

{"global_step": 550000, "eval_re": [120.57190043538868, 240.61417381222807, 
161.9331155920489, 191.39292483807185, 124.22475181571353, 101.78250872060691, 
148.03291708717407, 128.67440784344976, 135.86785281740632, 109.58554004842253],
"eval_len": [23, 46, 31, 37, 24, 20, 29, 25, 26, 21]}

 56%|█████▌    | 559998/1000000 [11:30:10<6:35:52, 18.52it/s]global step 560000, trans_decision ep_re 205.23971150653247

{"global_step": 560000, "eval_re": [140.86340228313978, 144.0890843900982, 
460.56149367515087, 347.81846192158736, 142.7739986317245, 203.66653080690713, 
125.13554784079848, 161.32915662678107, 169.00101838002496, 157.15842050911252],
"eval_len": [27, 28, 86, 65, 27, 39, 24, 32, 32, 31]}

 57%|█████▋    | 569998/1000000 [11:42:40<6:30:07, 18.37it/s]global step 570000, trans_decision ep_re 140.28457563936473

{"global_step": 570000, "eval_re": [122.97907766658152, 141.62721829805062, 
157.46388007878988, 163.32217115591786, 119.35968979151941, 180.80547735599856, 
95.61109925633795, 115.02063185397397, 134.20827088457855, 172.44824005189884], 
"eval_len": [24, 27, 30, 32, 23, 35, 19, 22, 27, 33]}

 58%|█████▊    | 579998/1000000 [11:55:00<6:18:34, 18.49it/s]global step 580000, trans_decision ep_re 176.43304056929793

{"global_step": 580000, "eval_re": [130.1450596561692, 113.96597119363827, 
96.52050080295018, 269.19658430156477, 119.6432533846651, 183.3882249154807, 
159.1863601891356, 173.58426829363418, 131.31004924719247, 387.390133708549], 
"eval_len": [25, 22, 19, 51, 23, 35, 31, 34, 25, 72]}

 59%|█████▉    | 589998/1000000 [12:07:30<6:11:40, 18.39it/s]global step 590000, trans_decision ep_re 141.12730255830147

{"global_step": 590000, "eval_re": [101.32721659210749, 89.63262795595845, 
103.1165835414842, 147.8281132793044, 147.21466772748997, 264.39558801327155, 
147.96732935093593, 159.86517134067532, 125.21524739791232, 124.71048038387528],
"eval_len": [20, 18, 20, 28, 29, 50, 28, 31, 24, 24]}

 60%|█████▉    | 599998/1000000 [12:19:50<6:03:01, 18.36it/s]global step 600000, trans_decision ep_re 148.50018023105852

{"global_step": 600000, "eval_re": [90.66824822349139, 216.689734772752, 
108.63330760724406, 188.14170026022165, 102.52959584500965, 154.24793180047217, 
131.02587761599125, 201.60002140452113, 152.07283892504617, 139.39254585583572],
"eval_len": [18, 41, 21, 37, 20, 30, 25, 39, 29, 27]}

 61%|██████    | 609998/1000000 [12:32:20<5:50:06, 18.57it/s]global step 610000, trans_decision ep_re 141.38689730187954

{"global_step": 610000, "eval_re": [96.60021833400965, 118.6992260393194, 
135.78124893478642, 166.39717417867485, 143.03256086348296, 132.627074196041, 
90.71738718240223, 228.40937444594243, 143.318209123456, 158.28649972068078], 
"eval_len": [19, 23, 26, 32, 27, 26, 18, 44, 28, 31]}

 62%|██████▏   | 619998/1000000 [12:44:40<5:43:29, 18.44it/s]global step 620000, trans_decision ep_re 194.698663691284

{"global_step": 620000, "eval_re": [526.4863548721511, 158.9139220506147, 
149.87255442684645, 114.76783236150726, 219.47656321347577, 185.0486978027042, 
149.39493430100504, 129.94838820550208, 165.54982476284178, 147.52756491619158],
"eval_len": [117, 31, 29, 22, 43, 36, 31, 25, 33, 28]}

 63%|██████▎   | 629998/1000000 [12:57:10<5:35:03, 18.41it/s]global step 630000, trans_decision ep_re 180.76688998599278

{"global_step": 630000, "eval_re": [113.62442141735399, 327.69466121061714, 
135.19505952455665, 113.59539559129865, 102.77669606848102, 130.3850860728341, 
510.73260852571167, 152.72678618957457, 113.56055460606488, 107.37763065343499],
"eval_len": [22, 60, 26, 22, 20, 25, 91, 30, 22, 21]}

 64%|██████▍   | 639998/1000000 [13:09:30<5:24:19, 18.50it/s]global step 640000, trans_decision ep_re 195.33261918990846

{"global_step": 640000, "eval_re": [451.4173394471013, 415.94491260530856, 
127.89578461787413, 116.60631041751627, 155.1858113768048, 113.14201525216782, 
182.68503479599255, 114.47131599706552, 108.11646677483843, 167.86120061441505],
"eval_len": [88, 81, 25, 23, 30, 22, 36, 22, 21, 33]}

 65%|██████▍   | 649998/1000000 [13:22:00<5:17:34, 18.37it/s]global step 650000, trans_decision ep_re 140.60720502853601

{"global_step": 650000, "eval_re": [146.14580900443025, 166.12353031976028, 
166.94130627585872, 114.18767049888248, 130.2171199740178, 159.04398175053177, 
130.70439237623253, 102.67850049352292, 153.47848537250547, 136.55125421961785],
"eval_len": [28, 32, 32, 22, 25, 31, 26, 20, 30, 27]}

 66%|██████▌   | 659998/1000000 [13:34:30<5:05:21, 18.56it/s]global step 660000, trans_decision ep_re 215.91660192419164

{"global_step": 660000, "eval_re": [420.24155397383845, 454.60424669349237, 
113.9851029746436, 281.63429688068027, 130.21491158346603, 158.05480963262696, 
141.57773452895867, 95.91360129657052, 231.12353599063582, 131.81622568700337], 
"eval_len": [77, 99, 22, 53, 25, 30, 27, 19, 44, 26]}

 67%|██████▋   | 669998/1000000 [13:46:50<4:57:40, 18.48it/s]global step 670000, trans_decision ep_re 169.79021517158918

{"global_step": 670000, "eval_re": [191.74391024722127, 146.53207302394807, 
172.6585571568849, 161.22538001959114, 135.18083700398213, 140.19294675203906, 
114.21138454914055, 331.96760618250323, 158.2207312980835, 145.96872548249786], 
"eval_len": [37, 29, 33, 31, 26, 27, 22, 63, 30, 28]}

 68%|██████▊   | 679998/1000000 [13:59:21<4:50:57, 18.33it/s]global step 680000, trans_decision ep_re 149.00959724067405

{"global_step": 680000, "eval_re": [172.33642132752826, 136.7586405449374, 
103.35983545204016, 154.3495929542689, 157.89885666798534, 108.72601646675581, 
150.66090869113427, 201.48765978400766, 137.63226241088728, 166.8857781071957], 
"eval_len": [33, 26, 20, 30, 31, 21, 29, 39, 27, 32]}

 69%|██████▉   | 689998/1000000 [14:11:41<4:38:25, 18.56it/s]global step 690000, trans_decision ep_re 131.7097735652162

{"global_step": 690000, "eval_re": [131.03942308010474, 108.77353350623187, 
117.08956370982007, 119.97784949374436, 191.97827589772842, 128.92109338635098, 
120.53803658171515, 149.97224304125763, 135.12696712291938, 113.68074983228955],
"eval_len": [26, 21, 23, 23, 37, 25, 23, 29, 26, 22]}

 70%|██████▉   | 699998/1000000 [14:24:11<4:30:32, 18.48it/s]global step 700000, trans_decision ep_re 183.79451121097424

{"global_step": 700000, "eval_re": [167.7695650182096, 237.63351000297976, 
102.9243161613024, 181.9564278444808, 155.80193982554366, 151.6412500651789, 
113.96288260549984, 96.49820396809636, 353.3093222468866, 276.44769437156475], 
"eval_len": [32, 45, 20, 36, 30, 29, 22, 19, 68, 58]}

 71%|███████   | 709998/1000000 [14:36:31<4:22:12, 18.43it/s]global step 710000, trans_decision ep_re 170.83530303093855

{"global_step": 710000, "eval_re": [156.73576703414227, 107.97278226196718, 
175.41283914584162, 144.84339433041532, 123.25871936393233, 400.0365990293929, 
146.64305466862652, 168.0113999262668, 129.52068339547478, 155.9177911533256], 
"eval_len": [30, 21, 35, 28, 24, 80, 28, 32, 25, 30]}

 72%|███████▏  | 719998/1000000 [14:49:01<4:13:33, 18.41it/s]global step 720000, trans_decision ep_re 130.28856240363535

{"global_step": 720000, "eval_re": [132.50651166068496, 102.05886721871238, 
167.88616232075347, 168.36690525597498, 108.83875260454593, 132.7031571343583, 
114.42135376218724, 108.29084087969844, 129.106211995462, 138.70686120397568], 
"eval_len": [26, 20, 32, 32, 21, 26, 22, 21, 25, 27]}

 73%|███████▎  | 729998/1000000 [15:01:21<4:04:17, 18.42it/s]global step 730000, trans_decision ep_re 142.5171384190393

{"global_step": 730000, "eval_re": [114.33155667800384, 149.1978175114483, 
140.77646007269988, 147.9733425470139, 160.9860633128897, 171.72025789158084, 
135.82595926426717, 129.13800551488788, 166.68175053401424, 108.54017086358701],
"eval_len": [22, 29, 27, 28, 31, 34, 26, 25, 33, 21]}

 74%|███████▍  | 739998/1000000 [15:13:51<3:55:02, 18.44it/s]global step 740000, trans_decision ep_re 151.23040153236155

{"global_step": 740000, "eval_re": [125.34743408896335, 103.82767368817262, 
164.74744759479165, 158.30951866413662, 140.68958433600625, 284.6768308371553, 
185.6671189833398, 119.12403207031804, 120.78944069132038, 109.12493436941149], 
"eval_len": [24, 20, 32, 30, 27, 55, 36, 23, 23, 21]}

 75%|███████▍  | 749998/1000000 [15:26:11<3:43:56, 18.61it/s]global step 750000, trans_decision ep_re 136.71558985336583

{"global_step": 750000, "eval_re": [102.07392622275418, 114.5547192568019, 
147.53166673691078, 118.6761294544952, 163.20404941303147, 188.8379592705552, 
125.55739154025406, 120.66138766071752, 135.46269584973567, 150.59597312840225],
"eval_len": [20, 22, 28, 23, 31, 36, 24, 23, 26, 29]}

 76%|███████▌  | 759998/1000000 [15:38:41<3:36:50, 18.45it/s]global step 760000, trans_decision ep_re 163.0582294508634

{"global_step": 760000, "eval_re": [161.8488804505442, 129.51828480272468, 
125.00151881342288, 102.1380580630294, 314.5403386387917, 176.32376932656203, 
160.96505238720835, 152.3023728582911, 182.95321698406994, 124.99080218398993], 
"eval_len": [31, 25, 24, 20, 63, 34, 31, 29, 35, 24]}

 77%|███████▋  | 769998/1000000 [15:51:01<3:26:25, 18.57it/s]global step 770000, trans_decision ep_re 145.8999151328158

{"global_step": 770000, "eval_re": [146.4281321828446, 332.17141659491756, 
97.1084646563347, 156.90591609346407, 89.54623902682172, 96.19531191063373, 
149.5358659106958, 164.20037766925384, 108.13002165408048, 118.77740562911164], 
"eval_len": [28, 61, 19, 30, 18, 19, 29, 31, 21, 23]}

 78%|███████▊  | 779998/1000000 [16:03:31<3:18:31, 18.47it/s]global step 780000, trans_decision ep_re 132.64954190503323

{"global_step": 780000, "eval_re": [174.7796793610443, 159.79363899293728, 
97.11903564017624, 103.66016639144557, 115.22278904419555, 125.29759307003106, 
124.63706492595803, 163.73535471556488, 108.71984689395211, 153.5302500150272], 
"eval_len": [34, 31, 19, 20, 22, 24, 24, 31, 21, 29]}

 79%|███████▉  | 789998/1000000 [16:15:51<3:08:31, 18.57it/s]global step 790000, trans_decision ep_re 213.93264503601705

{"global_step": 790000, "eval_re": [101.51904953186778, 124.97336902440765, 
145.87258095580785, 360.2505082148516, 107.72126243758012, 181.31498409392628, 
113.43677968351285, 146.7014062856123, 749.0989892252222, 108.43752090738147], 
"eval_len": [20, 24, 28, 69, 21, 35, 22, 28, 146, 21]}

 80%|███████▉  | 799998/1000000 [16:28:21<3:01:14, 18.39it/s]global step 800000, trans_decision ep_re 132.74587464357103

{"global_step": 800000, "eval_re": [129.30415517798144, 154.99133164761875, 
129.6973189796667, 119.07211400679742, 153.84931797864695, 147.93934690112167, 
134.6323487888979, 103.2177916092239, 118.24496632149621, 136.51005502425943], 
"eval_len": [25, 30, 25, 23, 30, 28, 26, 20, 23, 26]}

 81%|████████  | 809998/1000000 [16:40:41<2:51:22, 18.48it/s]global step 810000, trans_decision ep_re 183.92425490909508

{"global_step": 810000, "eval_re": [199.5275875698443, 178.17666420797084, 
141.47574123724948, 127.5493632793702, 130.29675480891046, 170.39082777295826, 
120.37595009560576, 121.92805982934065, 116.92403538912593, 532.5975649005749], 
"eval_len": [39, 34, 27, 25, 25, 33, 23, 24, 23, 104]}

 82%|████████▏ | 819998/1000000 [16:53:11<2:42:53, 18.42it/s]global step 820000, trans_decision ep_re 174.72369690509422

{"global_step": 820000, "eval_re": [141.2791216105456, 156.9687354049383, 
121.19443481122043, 144.26895739332744, 136.09672115451005, 96.51817910145091, 
168.31018539105207, 146.71197958720552, 510.96571993143823, 124.92293466525372],
"eval_len": [27, 30, 23, 28, 26, 19, 35, 28, 94, 24]}

 83%|████████▎ | 829998/1000000 [17:05:41<2:34:13, 18.37it/s]global step 830000, trans_decision ep_re 162.1392025629337

{"global_step": 830000, "eval_re": [164.0555654371197, 115.25824785002975, 
131.55656287807145, 167.0403957889791, 151.56369287367872, 135.6462942108191, 
129.09337231987288, 136.59090239827074, 382.7903724228099, 107.7966194496855], 
"eval_len": [31, 22, 25, 32, 29, 26, 25, 27, 73, 21]}

 84%|████████▍ | 839998/1000000 [17:18:11<2:25:54, 18.28it/s]global step 840000, trans_decision ep_re 147.00285785196996

{"global_step": 840000, "eval_re": [103.93437899442043, 103.454408330244, 
124.17149148322736, 162.75455333577113, 183.4089109167265, 89.8443420577771, 
144.54757232862673, 339.5457983277771, 102.71986695922864, 115.64725578590051], 
"eval_len": [20, 20, 24, 32, 35, 18, 28, 65, 20, 23]}

 85%|████████▍ | 849998/1000000 [17:30:31<2:14:26, 18.60it/s]global step 850000, trans_decision ep_re 189.4003652584332

{"global_step": 850000, "eval_re": [626.6652102372091, 197.70668929149278, 
102.86582054317819, 89.64077140809451, 124.0220273226586, 294.135050156669, 
158.27915208079307, 101.35468203452066, 103.10356733270041, 96.23068217701571], 
"eval_len": [124, 38, 20, 18, 24, 57, 30, 20, 20, 19]}

 86%|████████▌ | 859998/1000000 [17:43:01<2:06:42, 18.42it/s]global step 860000, trans_decision ep_re 175.12391685264456

{"global_step": 860000, "eval_re": [178.637861139136, 261.5297753103982, 
97.29673920991726, 181.63620573432024, 119.27478684485709, 283.1641127351672, 
156.81770239411603, 180.7263118869505, 155.1246360447035, 137.03103722687965], 
"eval_len": [35, 52, 19, 37, 23, 57, 30, 35, 30, 27]}

 87%|████████▋ | 869998/1000000 [17:55:21<1:57:35, 18.43it/s]global step 870000, trans_decision ep_re 158.87727514438856

{"global_step": 870000, "eval_re": [149.1835015406554, 151.82988416746613, 
125.71032322254405, 96.81221964217455, 153.00258861521266, 170.7621409677436, 
119.61994741836384, 120.19386027601401, 139.4908052940606, 362.1674802996505], 
"eval_len": [28, 30, 24, 19, 29, 33, 23, 23, 27, 68]}

 88%|████████▊ | 879998/1000000 [18:07:51<1:49:21, 18.29it/s]global step 880000, trans_decision ep_re 131.9782617356902

{"global_step": 880000, "eval_re": [147.27001438944126, 151.0551742398966, 
130.45696584943425, 140.76153244409667, 125.5257530510209, 156.02426481048596, 
103.28739058834806, 133.60607040065412, 96.19732504273779, 135.5981265407866], 
"eval_len": [28, 29, 25, 27, 24, 30, 20, 26, 19, 26]}

 89%|████████▉ | 889998/1000000 [18:20:21<1:39:29, 18.43it/s]global step 890000, trans_decision ep_re 184.4622106331019

{"global_step": 890000, "eval_re": [213.02065193066082, 165.55994392372483, 
147.62184436635934, 169.64204431713893, 149.86196623877635, 154.59329979449348, 
150.06918051748272, 184.91913289100546, 300.85955468974527, 208.47448766163194],
"eval_len": [41, 33, 28, 35, 29, 30, 29, 35, 58, 39]}

 90%|████████▉ | 899998/1000000 [18:32:41<1:29:41, 18.58it/s]global step 900000, trans_decision ep_re 154.59227743517837

{"global_step": 900000, "eval_re": [108.11241067734673, 220.98761250409152, 
174.05129538369096, 214.2874605616895, 114.85317657344761, 113.6771831075062, 
140.14223685349538, 156.34447669505457, 195.8411956161784, 107.62572637928307], 
"eval_len": [21, 43, 34, 41, 22, 22, 28, 30, 39, 21]}

 91%|█████████ | 909998/1000000 [18:45:01<1:22:06, 18.27it/s]global step 910000, trans_decision ep_re 150.12243165399732

{"global_step": 910000, "eval_re": [151.4120904928877, 168.16177040970283, 
137.69534282607816, 142.60080908669977, 191.21081680753082, 90.95703823134568, 
90.18022938110997, 230.92795704270063, 128.63154092739038, 169.44672133452707], 
"eval_len": [29, 33, 27, 28, 37, 18, 18, 44, 25, 35]}

 92%|█████████▏| 919998/1000000 [18:57:21<1:12:03, 18.50it/s]global step 920000, trans_decision ep_re 170.86583280246205

{"global_step": 920000, "eval_re": [479.40195647448286, 125.57430920424378, 
146.61376896400228, 112.64220440237959, 150.7936533416106, 97.22394359567775, 
188.45158608847362, 95.98607915452874, 163.2910249104416, 148.67980188877993], 
"eval_len": [101, 24, 28, 22, 29, 19, 36, 19, 31, 29]}

 93%|█████████▎| 929998/1000000 [19:09:41<1:02:36, 18.64it/s]global step 930000, trans_decision ep_re 124.16178560931107

{"global_step": 930000, "eval_re": [96.84932448066726, 154.1554476763107, 
118.95737902787315, 146.00463468820814, 109.19963305521367, 149.8533825965424, 
124.43187751508724, 113.77773728078787, 109.23681923441451, 119.15162053800567],
"eval_len": [19, 30, 23, 28, 21, 29, 24, 22, 21, 23]}

 94%|█████████▍| 939998/1000000 [19:22:01<54:22, 18.39it/s]global step 940000, trans_decision ep_re 153.77124598561403

{"global_step": 940000, "eval_re": [329.03812867477683, 171.57769597318176, 
146.9020985614836, 101.73642859412895, 102.55311978982763, 125.42065508316853, 
177.0374467785317, 147.121377451931, 139.08871722538726, 97.23679172372316], 
"eval_len": [63, 33, 28, 20, 20, 24, 35, 28, 27, 19]}

 95%|█████████▍| 949998/1000000 [19:34:31<45:18, 18.39it/s]global step 950000, trans_decision ep_re 162.42632150877725

{"global_step": 950000, "eval_re": [114.47813298422967, 162.18914273906404, 
172.33438834469342, 142.91781253651482, 120.76501740955824, 203.35515722467724, 
368.487586468627, 107.38444398877995, 107.60178622652396, 124.74974716510431], 
"eval_len": [22, 31, 33, 27, 23, 39, 71, 21, 21, 24]}

 96%|█████████▌| 959998/1000000 [19:47:01<35:53, 18.58it/s]global step 960000, trans_decision ep_re 188.180872447338

{"global_step": 960000, "eval_re": [483.733336916765, 278.268402703296, 
108.14149979687893, 157.2234322602607, 155.44168527886103, 141.36154176808805, 
173.81782350924058, 140.04555732970243, 107.9158778963271, 135.8595670139601], 
"eval_len": [102, 52, 21, 30, 30, 27, 33, 27, 21, 26]}

 97%|█████████▋| 969998/1000000 [19:59:21<27:03, 18.48it/s]global step 970000, trans_decision ep_re 148.20476045470258

{"global_step": 970000, "eval_re": [146.9910284078546, 131.95629811808354, 
130.6825116759325, 129.06212607538873, 185.75401282879204, 160.1466949273911, 
190.1275427055895, 181.15657905641714, 129.62234921751508, 96.54846153406187], 
"eval_len": [29, 25, 25, 25, 36, 31, 37, 35, 25, 19]}

 98%|█████████▊| 979998/1000000 [20:11:51<18:00, 18.51it/s]global step 980000, trans_decision ep_re 173.9775611519106

{"global_step": 980000, "eval_re": [96.30708551117613, 113.56460607712546, 
144.05985069025354, 211.2889287957714, 145.28349617417265, 209.07936563474013, 
113.50997272517886, 131.35402108339147, 474.444828742957, 100.8834560843394], 
"eval_len": [19, 22, 28, 41, 28, 41, 22, 25, 90, 20]}

 99%|█████████▉| 989998/1000000 [20:24:21<09:03, 18.42it/s]global step 990000, trans_decision ep_re 157.7698450306827

{"global_step": 990000, "eval_re": [124.17428858916115, 113.87848239275034, 
184.06364092534972, 161.78849740586756, 194.04093503010353, 126.41196646997408, 
129.98542569650434, 157.49540761520026, 255.87447043044608, 129.98533575147007],
"eval_len": [24, 22, 36, 31, 37, 24, 25, 30, 49, 25]}

100%|█████████▉| 999998/1000000 [20:36:41<00:00, 18.49it/s]global step 1000000, trans_decision ep_re 199.01755156037768

{"global_step": 1000000, "eval_re": [141.7379215906948, 300.2323753560703, 
194.6808219657716, 118.58280663919535, 421.0030647785428, 122.61078944572564, 
128.66209524908956, 123.69808155203813, 318.7136070181072, 120.25395200854135], 
"eval_len": [27, 58, 37, 23, 81, 24, 25, 24, 59, 23]}

100%|██████████| 1000000/1000000 [20:36:50<00:00, 13.48it/s]
