
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9998/1000000 [05:00<12:14:43, 22.46it/s]global step 10000, trans_decision ep_re 243.9894534293572

{"global_step": 10000, "eval_re": [120.14892682035975, 177.64401023989788, 
146.16200095039946, 130.4923394168105, 325.50533966271075, 336.2232209483299, 
291.1042908084233, 293.95621618069697, 286.13474133315947, 332.52344793278394], 
"eval_len": [23, 34, 28, 25, 62, 63, 56, 57, 55, 64]}

  2%|▏         | 19997/1000000 [15:30<12:21:36, 22.02it/s]global step 20000, trans_decision ep_re 293.63906144226866

{"global_step": 20000, "eval_re": [321.06294718338665, 458.17636936526617, 
135.99169228400817, 369.83172546673177, 347.350926348812, 160.81758965059015, 
329.2404710858518, 155.80896275742143, 457.8486388389264, 200.26129144169244], 
"eval_len": [61, 81, 26, 72, 65, 31, 65, 30, 83, 38]}

  3%|▎         | 29999/1000000 [25:20<12:11:33, 22.10it/s]global step 30000, trans_decision ep_re 224.8017217033163

{"global_step": 30000, "eval_re": [130.91226212636167, 286.02755643096634, 
378.3891659489465, 140.25482475807686, 283.6374663884456, 187.56355203018703, 
192.07486761259128, 329.7505089447111, 173.40049960048793, 146.0065131923884], 
"eval_len": [25, 56, 73, 27, 55, 36, 37, 64, 33, 28]}

  4%|▍         | 39999/1000000 [35:40<12:02:09, 22.16it/s]global step 40000, trans_decision ep_re 326.1348023098853

{"global_step": 40000, "eval_re": [410.53309995471784, 320.7781258947606, 
405.6691308591155, 380.3130838349608, 312.9532661255257, 183.0158462499878, 
366.65816080244434, 324.309547332069, 362.93857649847143, 194.17918554680006], 
"eval_len": [75, 62, 75, 70, 59, 35, 68, 62, 68, 37]}

  5%|▍         | 49999/1000000 [45:50<11:59:02, 22.02it/s]global step 50000, trans_decision ep_re 252.94581738193924

{"global_step": 50000, "eval_re": [387.3049922485462, 307.76498113496467, 
290.2122267098513, 130.39569947110792, 311.5619099835548, 160.633025578035, 
281.23003277648877, 318.49957741891313, 189.74955057630075, 152.10617792163026],
"eval_len": [72, 60, 56, 25, 61, 31, 56, 61, 36, 29]}

  6%|▌         | 59999/1000000 [56:00<11:49:47, 22.07it/s]global step 60000, trans_decision ep_re 243.00485704261934

{"global_step": 60000, "eval_re": [188.51702476481861, 335.2974220501759, 
146.1115843112122, 172.868849217067, 146.44035800762185, 418.5246486453333, 
183.33929356984763, 383.43337976640044, 125.15105976573862, 330.36495032797797],
"eval_len": [36, 63, 28, 33, 28, 77, 35, 72, 24, 62]}

  7%|▋         | 69999/1000000 [1:06:10<11:41:13, 22.10it/s]global step 70000, trans_decision ep_re 310.1324131928417

{"global_step": 70000, "eval_re": [287.2631418116916, 421.26312717275033, 
372.13471593455233, 140.8731419678114, 303.0522964114344, 284.2446622194814, 
280.88395223124246, 341.62127263459246, 343.85435093024364, 326.13347061461667],
"eval_len": [55, 77, 70, 27, 58, 56, 54, 64, 65, 62]}

  8%|▊         | 79998/1000000 [1:16:20<11:23:22, 22.44it/s]global step 80000, trans_decision ep_re 285.4483420712771

{"global_step": 80000, "eval_re": [293.2332317737481, 368.94030745003204, 
348.534186935746, 223.38207573170183, 156.99852788339552, 393.9657454348612, 
408.55620208702663, 146.5153459162332, 141.15391297535308, 373.2038845246735], 
"eval_len": [57, 69, 64, 42, 30, 72, 76, 28, 27, 68]}

  9%|▉         | 89999/1000000 [1:26:30<11:29:30, 22.00it/s]global step 90000, trans_decision ep_re 291.61687687445175

{"global_step": 90000, "eval_re": [431.1311433708999, 372.7680781540703, 
151.31228166412552, 135.5465371414982, 393.1458902870399, 135.87509528249245, 
376.9296844885937, 287.767020036943, 301.77931962412146, 329.91371869473323], 
"eval_len": [79, 69, 29, 26, 73, 26, 71, 55, 59, 61]}

 10%|▉         | 99999/1000000 [1:36:30<11:15:15, 22.21it/s]global step 100000, trans_decision ep_re 318.0817598754485

{"global_step": 100000, "eval_re": [319.99006417509753, 177.31985924573885, 
353.8592937529877, 445.5237364039575, 396.2882336296297, 330.3336264593626, 
304.59699008739324, 303.4791043193471, 155.97440302085323, 393.4522876601178], 
"eval_len": [61, 34, 66, 80, 73, 62, 59, 59, 30, 74]}

 11%|█         | 109998/1000000 [1:46:41<11:03:36, 22.35it/s]global step 110000, trans_decision ep_re 324.5618215542743

{"global_step": 110000, "eval_re": [140.5828165506571, 347.5520456881824, 
377.6722230631125, 409.5879347297586, 439.8883392914993, 426.80138259706666, 
183.03309057358027, 141.14221752492915, 381.109862076048, 398.2483034479092], 
"eval_len": [27, 65, 70, 74, 80, 77, 35, 27, 71, 73]}

 12%|█▏        | 119999/1000000 [1:57:10<11:05:53, 22.03it/s]global step 120000, trans_decision ep_re 299.1791969580751

{"global_step": 120000, "eval_re": [146.33088341708478, 490.6139153014213, 
417.81432984866825, 151.51139195631802, 152.1904591411663, 398.07230401275297, 
355.2595442695083, 119.87900250406932, 388.20886666851754, 371.911272461244], 
"eval_len": [28, 91, 76, 29, 29, 73, 68, 23, 72, 68]}

 13%|█▎        | 129999/1000000 [2:07:20<10:57:11, 22.06it/s]global step 130000, trans_decision ep_re 333.754089382987

{"global_step": 130000, "eval_re": [423.4095140453828, 361.9355344455283, 
435.72356724899294, 156.54312605348736, 281.4257309554667, 372.8366537600784, 
358.85306365435616, 426.7645921489817, 343.8963412741854, 176.15277024341], 
"eval_len": [78, 68, 81, 30, 55, 69, 67, 79, 64, 34]}

 14%|█▍        | 139997/1000000 [2:17:40<10:56:02, 21.85it/s]global step 140000, trans_decision ep_re 309.0961596776546

{"global_step": 140000, "eval_re": [388.70191716281545, 431.88824646782376, 
356.4867624573729, 386.3699463345865, 167.7566762468102, 151.20050687491053, 
157.09207873539486, 462.8124570044805, 172.92760290357091, 415.7254025887805], 
"eval_len": [73, 79, 65, 72, 32, 29, 30, 85, 33, 77]}

 15%|█▍        | 149999/1000000 [2:27:50<10:40:37, 22.11it/s]global step 150000, trans_decision ep_re 326.1637620643519

{"global_step": 150000, "eval_re": [225.85541629934073, 374.04699132402135, 
386.5141143305531, 394.319397264964, 431.7449433755885, 354.4876085310616, 
237.37894158587932, 346.8304772061995, 141.29192686527819, 369.16780386063306], 
"eval_len": [43, 70, 70, 71, 80, 66, 45, 65, 27, 69]}

 16%|█▌        | 159999/1000000 [2:37:51<10:32:07, 22.15it/s]global step 160000, trans_decision ep_re 271.0009094503592

{"global_step": 160000, "eval_re": [331.39136549982163, 140.75056571476583, 
146.618411022387, 172.1849916710534, 199.10192648635638, 377.634821981405, 
396.0047120752734, 371.6413018105559, 135.56905851756522, 439.1119397244084], 
"eval_len": [62, 27, 28, 33, 38, 70, 73, 70, 26, 81]}

 17%|█▋        | 169999/1000000 [2:48:20<10:24:40, 22.15it/s]global step 170000, trans_decision ep_re 335.1885360150919

{"global_step": 170000, "eval_re": [155.30976467691735, 245.16670360831975, 
420.2206509708299, 361.0504574804426, 358.05816467565523, 396.12787400290455, 
424.23150688238536, 433.00657824178097, 407.30935517785065, 151.40430443383286],
"eval_len": [30, 46, 78, 66, 67, 73, 79, 78, 75, 29]}

 18%|█▊        | 179999/1000000 [2:58:30<10:15:45, 22.19it/s]global step 180000, trans_decision ep_re 279.0383421287674

{"global_step": 180000, "eval_re": [349.586173485405, 238.23065578362915, 
130.88467967587968, 355.3568826914068, 483.98879652278276, 401.8013547191335, 
135.8427490470211, 364.1214485709867, 130.52839444910268, 200.04228634232683], 
"eval_len": [66, 45, 25, 67, 89, 73, 26, 69, 25, 38]}

 19%|█▉        | 189997/1000000 [3:08:50<10:10:34, 22.11it/s]global step 190000, trans_decision ep_re 252.1980407172295

{"global_step": 190000, "eval_re": [167.1658868241161, 314.4960998592196, 
151.1415704350312, 151.0302786515697, 151.57771683410036, 140.27220190592763, 
300.40910164205457, 531.2322531696352, 452.4690582551225, 162.1862395955183], 
"eval_len": [32, 61, 29, 29, 29, 27, 58, 96, 84, 31]}

 20%|█▉        | 199999/1000000 [3:19:00<10:04:40, 22.05it/s]global step 200000, trans_decision ep_re 228.9151079318379

{"global_step": 200000, "eval_re": [364.30123371791586, 166.42561005306908, 
145.99621594286475, 377.8849778282762, 400.6994894045369, 130.93289065655964, 
150.84735994100552, 156.69373401569564, 182.6105808205851, 212.75898693786988], 
"eval_len": [71, 32, 28, 71, 74, 25, 29, 30, 35, 41]}

 21%|██        | 209999/1000000 [3:29:00<9:57:39, 22.03it/s]global step 210000, trans_decision ep_re 286.4857810790189

{"global_step": 210000, "eval_re": [176.97046544648842, 140.9332235632425, 
151.0698254527436, 329.7666036992452, 156.79533935114853, 353.55321009681865, 
552.6152980120811, 356.3957958164299, 441.84352064596226, 204.91452870602936], 
"eval_len": [34, 27, 29, 62, 30, 67, 99, 66, 82, 39]}

 22%|██▏       | 219999/1000000 [3:39:30<9:46:57, 22.15it/s]global step 220000, trans_decision ep_re 324.2604520479638

{"global_step": 220000, "eval_re": [339.8479539890098, 161.0600852204731, 
421.0206615286581, 177.635844957197, 390.8229885212912, 407.7452405899787, 
397.85710421192283, 406.71557776241826, 151.10130639105913, 388.7977573076294], 
"eval_len": [63, 31, 79, 34, 72, 73, 74, 76, 29, 72]}

 23%|██▎       | 229999/1000000 [3:49:40<9:33:32, 22.38it/s]global step 230000, trans_decision ep_re 314.92235110362924

{"global_step": 230000, "eval_re": [150.97730217462532, 437.6840128502253, 
343.4509211845324, 150.84788695049136, 290.4706129632643, 389.4006410860969, 
178.3429087775789, 352.5571619515434, 491.8653256949187, 363.6267374030163], 
"eval_len": [29, 79, 65, 29, 57, 74, 34, 67, 89, 68]}

 24%|██▍       | 239999/1000000 [3:59:50<9:31:34, 22.16it/s]global step 240000, trans_decision ep_re 235.2094440814828

{"global_step": 240000, "eval_re": [447.065706864679, 182.0108730002449, 
445.21592351683563, 141.1629179268794, 135.63787966154268, 194.19975451819818, 
173.17523500008045, 141.239987068906, 151.33697440376724, 341.04918885369426], 
"eval_len": [80, 35, 81, 27, 26, 37, 33, 27, 29, 64]}

 25%|██▍       | 249999/1000000 [4:10:00<9:19:14, 22.35it/s]global step 250000, trans_decision ep_re 252.95235733415106

{"global_step": 250000, "eval_re": [461.6033594096644, 140.70000373581328, 
188.40464009262496, 214.7098037067953, 146.04000415522592, 413.667912996541, 
150.79774800107293, 341.9847755671248, 335.5981570659624, 136.01716861068564], 
"eval_len": [84, 27, 36, 41, 28, 75, 29, 63, 63, 26]}

 26%|██▌       | 259999/1000000 [4:20:10<9:16:16, 22.17it/s]global step 260000, trans_decision ep_re 341.6302931817068

{"global_step": 260000, "eval_re": [316.688982586666, 140.31472208007793, 
145.4813567019222, 423.518839935324, 677.2829299692416, 353.82680130594497, 
348.43582315226655, 370.06123269227214, 278.4145950211615, 362.2776483721908], 
"eval_len": [60, 27, 28, 77, 126, 66, 68, 71, 54, 68]}

 27%|██▋       | 269999/1000000 [4:30:10<9:06:05, 22.28it/s]global step 270000, trans_decision ep_re 253.1029206675724

{"global_step": 270000, "eval_re": [156.47508096008806, 130.85125370965767, 
375.10120175757754, 406.5692483888229, 442.9312420516318, 167.00476039271226, 
151.1176663749614, 408.5279690510636, 167.25648308085113, 125.1943009083575], 
"eval_len": [30, 25, 68, 75, 80, 32, 29, 75, 32, 24]}

 28%|██▊       | 279999/1000000 [4:40:20<9:01:42, 22.15it/s]global step 280000, trans_decision ep_re 289.64897010262933

{"global_step": 280000, "eval_re": [204.66112267596668, 433.78036431760484, 
175.91956247588757, 193.81990816188085, 381.21498946876176, 173.45131637379345, 
357.5189332537848, 145.9549925928977, 496.31756751703415, 333.85094418868175], 
"eval_len": [39, 80, 34, 37, 70, 33, 67, 28, 94, 66]}

 29%|██▉       | 289999/1000000 [4:50:30<8:54:52, 22.12it/s]global step 290000, trans_decision ep_re 227.1033581817739

{"global_step": 290000, "eval_re": [140.07860147900195, 357.244699170503, 
166.24085122073956, 193.74640538291223, 173.17152103235256, 412.4865839639784, 
363.65731327801586, 135.99717919373023, 140.47139075640453, 187.93903634010056],
"eval_len": [27, 67, 32, 37, 33, 75, 68, 26, 27, 36]}

 30%|██▉       | 299999/1000000 [5:00:40<8:43:43, 22.28it/s]global step 300000, trans_decision ep_re 238.32586787886947

{"global_step": 300000, "eval_re": [172.88848071951833, 412.72538029054584, 
324.8227565672369, 173.39687771189202, 348.451294490214, 130.58523057336564, 
145.84809965202902, 173.66136610211822, 323.22998974436535, 177.64920293740923],
"eval_len": [33, 75, 61, 33, 67, 25, 28, 33, 63, 34]}

 31%|███       | 309999/1000000 [5:10:50<8:33:45, 22.38it/s]global step 310000, trans_decision ep_re 243.7959678342823

{"global_step": 310000, "eval_re": [140.69865130033565, 281.9199406508227, 
289.90970903699747, 136.05095681850608, 136.21597594706788, 145.9000152325613, 
649.3564971883575, 146.0472832768845, 146.07850422649216, 365.78214466479784], 
"eval_len": [27, 55, 56, 26, 26, 28, 117, 28, 28, 70]}

 32%|███▏      | 319999/1000000 [5:21:00<8:28:36, 22.28it/s]global step 320000, trans_decision ep_re 218.81970956847323

{"global_step": 320000, "eval_re": [151.30296053493035, 270.2569937314017, 
346.0089484019796, 151.62317666248083, 281.2225325518893, 177.76707220316064, 
161.0087382700773, 341.1264628782489, 161.86771236885048, 146.01249808171312], 
"eval_len": [29, 53, 66, 29, 55, 34, 31, 64, 31, 28]}

 33%|███▎      | 329999/1000000 [5:31:20<8:19:15, 22.37it/s]global step 330000, trans_decision ep_re 227.18630760110736

{"global_step": 330000, "eval_re": [167.59185195439804, 162.3628353308423, 
125.46482765929198, 281.5122517223251, 178.00317225827382, 166.5244139092017, 
130.65767933432298, 372.00710591533823, 330.0923269853119, 357.64661094176745], 
"eval_len": [32, 31, 24, 55, 34, 32, 25, 69, 64, 66]}

 34%|███▍      | 339999/1000000 [5:41:30<8:17:38, 22.10it/s]global step 340000, trans_decision ep_re 267.085620806728

{"global_step": 340000, "eval_re": [408.0145535538681, 356.2836918415554, 
165.76845848568118, 146.67485717852688, 260.4516161662444, 188.2580597863762, 
161.59942760278847, 222.4419415347853, 418.42869085794166, 342.93491105951284], 
"eval_len": [73, 67, 32, 28, 49, 36, 31, 42, 77, 64]}

 35%|███▍      | 349999/1000000 [5:51:40<8:11:11, 22.06it/s]global step 350000, trans_decision ep_re 227.31303253770201

{"global_step": 350000, "eval_re": [187.85855364720067, 136.15094575125414, 
146.39336290286693, 333.2903831465554, 135.54528833037048, 476.5453671862291, 
199.4873956980381, 386.0587681005179, 125.55791270816302, 146.24234790582452], 
"eval_len": [36, 26, 28, 64, 26, 88, 38, 71, 24, 28]}

 36%|███▌      | 359999/1000000 [6:01:50<7:59:41, 22.24it/s]global step 360000, trans_decision ep_re 228.7294649046509

{"global_step": 360000, "eval_re": [372.63612076173723, 304.742712817832, 
167.4261021500846, 141.11521710631928, 140.8177639456443, 178.2468974639402, 
335.9622642707081, 145.80326969376208, 323.74303052647605, 176.80127031000512], 
"eval_len": [69, 58, 32, 27, 27, 34, 64, 28, 60, 34]}

 37%|███▋      | 369999/1000000 [6:12:00<7:54:22, 22.13it/s]global step 370000, trans_decision ep_re 263.4673080704621

{"global_step": 370000, "eval_re": [290.96279755169405, 146.27676541540515, 
130.33952045235682, 446.17656711091803, 297.39436470449334, 261.9185251675804, 
151.75001541799043, 305.88168412126714, 300.274264851839, 303.6985759110756], 
"eval_len": [56, 28, 25, 81, 57, 51, 29, 59, 59, 58]}

 38%|███▊      | 379999/1000000 [6:22:00<7:40:01, 22.46it/s]global step 380000, trans_decision ep_re 413.79873640943543

{"global_step": 380000, "eval_re": [141.39678243631604, 705.0161286739914, 
917.7143662737791, 178.1230209348618, 734.889268484879, 677.3518077549284, 
130.60274972681046, 188.20766811729112, 130.88712919726777, 333.79844249422945],
"eval_len": [27, 128, 164, 34, 131, 134, 25, 36, 25, 64]}

 39%|███▉      | 389997/1000000 [6:32:30<7:37:59, 22.20it/s]global step 390000, trans_decision ep_re 338.58286665236636

{"global_step": 390000, "eval_re": [439.374342053621, 410.45386119208763, 
173.3858162701011, 141.10280066616914, 370.4187557673213, 591.1129915056504, 
167.28186737533213, 332.2235367547805, 321.185896025606, 439.2887989129946], 
"eval_len": [80, 78, 33, 27, 67, 105, 32, 64, 60, 80]}

 40%|███▉      | 399999/1000000 [6:42:40<7:30:25, 22.20it/s]global step 400000, trans_decision ep_re 284.9981266506817

{"global_step": 400000, "eval_re": [193.49401804640257, 340.90536486060853, 
151.7545387994465, 372.54383907516467, 496.54460466750567, 342.6339459412703, 
315.828785939354, 150.9738698568037, 155.82723798418888, 329.4750613360717], 
"eval_len": [37, 62, 29, 68, 91, 62, 61, 29, 30, 62]}

 41%|████      | 409998/1000000 [6:52:50<7:18:58, 22.40it/s]global step 410000, trans_decision ep_re 325.1682710092589

{"global_step": 410000, "eval_re": [236.01467333608193, 146.81057518875988, 
607.1985438599235, 177.99681994785905, 346.0070254372269, 157.43264943410037, 
345.5297757121187, 292.0618113219121, 231.56632353204677, 711.06451232256], 
"eval_len": [45, 28, 111, 34, 64, 30, 65, 57, 44, 145]}

 42%|████▏     | 419998/1000000 [7:03:00<7:11:47, 22.39it/s]global step 420000, trans_decision ep_re 256.99789896313155

{"global_step": 420000, "eval_re": [333.4430039443176, 157.27465345401657, 
291.47752528934404, 304.13936684498407, 345.2022370806446, 286.76444136861784, 
141.26033193695443, 151.9946933758861, 348.95954603314607, 209.46319030340416], 
"eval_len": [63, 30, 56, 58, 66, 55, 27, 29, 67, 40]}

 43%|████▎     | 429998/1000000 [7:13:10<7:04:42, 22.37it/s]global step 430000, trans_decision ep_re 253.3067077323505

{"global_step": 430000, "eval_re": [329.0522534613051, 319.3062587137886, 
136.03476459003718, 156.3569451136785, 297.0760388265964, 454.8663655554678, 
166.88051256469214, 332.800107217919, 146.36194840049126, 194.33188287952888], 
"eval_len": [63, 62, 26, 30, 58, 82, 32, 65, 28, 37]}

 44%|████▍     | 439998/1000000 [7:23:20<6:53:45, 22.56it/s]global step 440000, trans_decision ep_re 237.09279451741372

{"global_step": 440000, "eval_re": [316.54007087131214, 167.55164385456877, 
141.60856960107796, 141.01471358936567, 465.39798534709274, 468.8666290130259, 
177.5525419905625, 174.07654696943575, 141.30207166942273, 177.01717226827301], 
"eval_len": [61, 32, 27, 27, 83, 85, 34, 33, 27, 34]}

 45%|████▍     | 449998/1000000 [7:33:30<6:49:25, 22.39it/s]global step 450000, trans_decision ep_re 249.25789566165503

{"global_step": 450000, "eval_re": [140.54611056786862, 306.307432920073, 
156.66394954061417, 370.3489060632244, 293.72387440671804, 125.83010702018137, 
162.46553340040776, 327.37043805249846, 308.6380564384572, 300.68454820650715], 
"eval_len": [27, 60, 30, 69, 57, 24, 31, 61, 58, 58]}

 46%|████▌     | 459998/1000000 [7:43:40<6:41:39, 22.41it/s]global step 460000, trans_decision ep_re 291.543930838014

{"global_step": 460000, "eval_re": [151.14520562642508, 540.6674617655779, 
162.3239569940114, 166.9390355998122, 329.12443849719807, 208.9639880313294, 
374.84926193121476, 437.5377102489698, 141.33756428833027, 402.5506853972713], 
"eval_len": [29, 96, 31, 32, 64, 40, 69, 80, 27, 75]}

 47%|████▋     | 469997/1000000 [7:53:50<6:39:09, 22.13it/s]global step 470000, trans_decision ep_re 289.27490244199765

{"global_step": 470000, "eval_re": [442.47512645261713, 397.0957339363497, 
130.5402374989773, 125.07632193267882, 339.87953783220036, 357.07488899812694, 
315.22106225906117, 412.89632102250357, 157.1748719437706, 215.31492254369104], 
"eval_len": [81, 74, 25, 24, 65, 66, 61, 75, 30, 41]}

 48%|████▊     | 479997/1000000 [8:04:00<6:33:08, 22.04it/s]global step 480000, trans_decision ep_re 352.22015910104204

{"global_step": 480000, "eval_re": [146.32821233036037, 351.68355514015707, 
401.4918255054337, 188.22734683124813, 188.30374996471224, 326.2090567466127, 
389.0478129041296, 157.00617882556776, 457.0734585423185, 916.8303942198803], 
"eval_len": [28, 66, 74, 36, 36, 62, 72, 30, 82, 181]}

 49%|████▉     | 489997/1000000 [8:14:10<6:25:46, 22.03it/s]global step 490000, trans_decision ep_re 259.3844826728239

{"global_step": 490000, "eval_re": [325.089074300478, 136.34323923174537, 
215.11052643907675, 177.90965528735714, 413.32760682233874, 417.6587243111818, 
200.11343822051742, 373.9005390863007, 167.2817834842387, 167.11023954500428], 
"eval_len": [62, 26, 41, 34, 75, 76, 38, 69, 32, 32]}

 50%|████▉     | 499998/1000000 [8:24:20<6:12:54, 22.35it/s]global step 500000, trans_decision ep_re 276.8643319117238

{"global_step": 500000, "eval_re": [246.32246175491375, 151.99274328046087, 
655.9953847218462, 236.5292966350165, 151.40501627769515, 242.08710142221048, 
327.6085389834445, 340.722543053722, 151.93323943194733, 264.0469935559812], 
"eval_len": [47, 29, 129, 45, 29, 46, 63, 64, 29, 50]}

 51%|█████     | 509997/1000000 [8:34:30<6:12:21, 21.93it/s]global step 510000, trans_decision ep_re 271.5014309043771

{"global_step": 510000, "eval_re": [241.27620571960472, 350.5393214904523, 
238.12551219092268, 156.80058653025378, 245.74548264649925, 317.7800282693614, 
192.9214946282176, 326.33960276385056, 209.3420723889085, 436.1440024157007], 
"eval_len": [46, 64, 45, 30, 47, 61, 37, 63, 40, 79]}

 52%|█████▏    | 519999/1000000 [8:44:40<6:02:18, 22.08it/s]global step 520000, trans_decision ep_re 369.14622048353374

{"global_step": 520000, "eval_re": [309.5262530278859, 419.96397763342446, 
894.626408325398, 162.52157322874996, 421.1163765455503, 422.66854904795474, 
253.8644981397137, 343.3819843746003, 306.91242008194763, 156.8801644301129], 
"eval_len": [60, 78, 167, 31, 77, 77, 48, 65, 60, 30]}

 53%|█████▎    | 529999/1000000 [8:54:40<5:52:38, 22.21it/s]global step 530000, trans_decision ep_re 285.868605546962

{"global_step": 530000, "eval_re": [366.5857723489246, 140.61656817161258, 
205.13135694595573, 321.5281587989099, 352.5830949689956, 130.8463524204297, 
290.91364137158195, 273.26152392733286, 146.45271885315563, 630.7668676627213], 
"eval_len": [69, 27, 39, 61, 66, 25, 56, 52, 28, 116]}

 54%|█████▍    | 539997/1000000 [9:04:52<5:48:25, 22.00it/s]global step 540000, trans_decision ep_re 210.57772130270345

{"global_step": 540000, "eval_re": [135.40346295266104, 150.98398477299543, 
135.99629189854403, 188.64503065075723, 194.58064366552438, 168.03591513546837, 
276.0091920114546, 295.55426047727394, 349.5421157733808, 211.026315688975], 
"eval_len": [26, 29, 26, 36, 37, 32, 53, 57, 65, 40]}

 55%|█████▍    | 549999/1000000 [9:15:20<5:41:13, 21.98it/s]global step 550000, trans_decision ep_re 453.1949110360053

{"global_step": 550000, "eval_re": [363.5960624796717, 130.8145079657224, 
902.2344957443169, 280.5855945013626, 653.1685153212343, 216.1270301777649, 
717.9815848774324, 177.45871007994106, 680.0556267337407, 409.926982478866], 
"eval_len": [68, 25, 158, 54, 122, 41, 137, 34, 124, 75]}

 56%|█████▌    | 559999/1000000 [9:25:30<5:31:59, 22.09it/s]global step 560000, trans_decision ep_re 293.56315098825104

{"global_step": 560000, "eval_re": [125.66419128483003, 351.21061966220896, 
141.38461138735113, 156.13633147291878, 335.2770414526343, 320.20693980079426, 
443.92811112340945, 219.95087722721198, 721.8538943588931, 120.0188921122584], 
"eval_len": [24, 67, 27, 30, 64, 60, 81, 42, 137, 23]}

 57%|█████▋    | 569998/1000000 [9:35:31<5:18:16, 22.52it/s]global step 570000, trans_decision ep_re 195.79706575353092

{"global_step": 570000, "eval_re": [146.55634573920102, 120.01717645648601, 
177.0440577528387, 296.6194224873977, 297.9369876961414, 157.21500949232345, 
151.25610532021, 163.09944150985902, 302.3849274014857, 145.84118367936634], 
"eval_len": [28, 23, 34, 57, 57, 30, 29, 31, 58, 28]}

 58%|█████▊    | 579999/1000000 [9:45:40<5:14:12, 22.28it/s]global step 580000, trans_decision ep_re 284.1006931469049

{"global_step": 580000, "eval_re": [385.0785681969239, 188.58543835190605, 
309.69306871575986, 187.69946336082705, 370.1761416558523, 233.24734714163435, 
361.3380092033141, 343.47369955879446, 130.74372421882202, 330.9714710652147], 
"eval_len": [71, 36, 60, 36, 68, 44, 68, 64, 25, 64]}

 59%|█████▉    | 589999/1000000 [9:56:00<5:06:57, 22.26it/s]global step 590000, trans_decision ep_re 454.88711756399243

{"global_step": 590000, "eval_re": [747.2983246127083, 266.7837836697722, 
378.3690376538048, 635.1440761356557, 596.0527822468304, 387.23803323617403, 
307.99046366526846, 313.6765804648756, 572.6860050749667, 343.63208887986866], 
"eval_len": [138, 50, 70, 112, 110, 72, 59, 61, 109, 65]}

 60%|█████▉    | 599999/1000000 [10:06:01<5:01:00, 22.15it/s]global step 600000, trans_decision ep_re 261.28909722966324

{"global_step": 600000, "eval_re": [875.1408703940863, 192.11580882863836, 
180.99628164680584, 135.82605468574866, 156.51450325271543, 188.6842615058258, 
198.00912799166923, 303.3175953240264, 231.83956926750312, 150.44689939961333], 
"eval_len": [158, 37, 35, 26, 30, 36, 38, 58, 44, 29]}

 61%|██████    | 609999/1000000 [10:16:20<4:52:01, 22.26it/s]global step 610000, trans_decision ep_re 240.94627702692168

{"global_step": 610000, "eval_re": [449.5608928097199, 136.46675342937408, 
141.4169217814032, 161.81190496344186, 369.4571328200388, 183.7912530795358, 
383.024372547995, 291.6211720619557, 156.07052458451435, 136.24184219123833], 
"eval_len": [81, 26, 27, 31, 70, 35, 73, 57, 30, 26]}

 62%|██████▏   | 619999/1000000 [10:26:30<4:47:29, 22.03it/s]global step 620000, trans_decision ep_re 333.5665079827283

{"global_step": 620000, "eval_re": [548.045103906241, 156.37372096616372, 
140.82493532145335, 348.3608778914785, 125.58641351168208, 921.8734775421059, 
356.2726955118309, 361.4192894782056, 230.32013508918354, 146.5884306089389], 
"eval_len": [100, 30, 27, 64, 24, 165, 67, 68, 44, 28]}

 63%|██████▎   | 629999/1000000 [10:36:40<4:39:15, 22.08it/s]global step 630000, trans_decision ep_re 355.97429965993354

{"global_step": 630000, "eval_re": [782.3388565224642, 146.17730225841447, 
130.58164475718613, 167.2783768891764, 207.91417063604837, 225.01439800217634, 
328.9014885062571, 907.5904880230887, 389.69940542395267, 274.24686558057084], 
"eval_len": [150, 28, 25, 32, 40, 43, 63, 168, 71, 54]}

 64%|██████▍   | 639999/1000000 [10:46:41<4:27:53, 22.40it/s]global step 640000, trans_decision ep_re 289.1716178256803

{"global_step": 640000, "eval_re": [176.42038271955582, 586.2999806851996, 
146.40583197158094, 260.3010364224878, 151.65003225787495, 161.88015682251947, 
183.94036127669284, 203.73031452063805, 833.647704745302, 187.44037683495102], 
"eval_len": [34, 103, 28, 51, 29, 31, 35, 39, 148, 36]}

 65%|██████▍   | 649997/1000000 [10:56:52<4:21:05, 22.34it/s]global step 650000, trans_decision ep_re 216.31240458150256

{"global_step": 650000, "eval_re": [166.7717820166838, 151.70806918094738, 
156.4519252371985, 161.4107233693031, 167.98619959518075, 140.95654403912184, 
150.919605995184, 212.9358325601496, 717.725801231421, 136.2575625898354], 
"eval_len": [32, 29, 30, 31, 32, 27, 29, 41, 121, 26]}

 66%|██████▌   | 659997/1000000 [11:07:02<4:16:20, 22.11it/s]global step 660000, trans_decision ep_re 261.54518459311475

{"global_step": 660000, "eval_re": [167.3667318087731, 219.53643498674649, 
295.99437770808305, 161.82442033591573, 338.77551723024925, 327.6662953173116, 
272.68482452214437, 345.8587372785798, 350.06585206593655, 135.6786546774074], 
"eval_len": [32, 42, 57, 31, 65, 63, 53, 67, 67, 26]}

 67%|██████▋   | 669999/1000000 [11:17:30<4:06:57, 22.27it/s]global step 670000, trans_decision ep_re 296.7102206270607

{"global_step": 670000, "eval_re": [340.9640915612826, 369.63816857969476, 
210.4922460076375, 172.66586597416753, 328.6411942871148, 305.2163010583334, 
320.6518387724266, 330.8177575738916, 301.4231545835863, 286.5915878724717], 
"eval_len": [66, 68, 40, 33, 63, 59, 62, 64, 58, 55]}

 68%|██████▊   | 679998/1000000 [11:27:40<3:58:48, 22.33it/s]global step 680000, trans_decision ep_re 224.66852811359678

{"global_step": 680000, "eval_re": [298.3238702606371, 376.2884399618465, 
192.88661980058885, 207.94660775258552, 189.20388838250634, 209.37692748304133, 
146.98114273216973, 146.4277705189948, 323.46975269719036, 155.78026154640767], 
"eval_len": [58, 71, 37, 40, 36, 40, 28, 28, 61, 30]}

 69%|██████▉   | 689998/1000000 [11:37:50<3:49:49, 22.48it/s]global step 690000, trans_decision ep_re 337.49459051952005

{"global_step": 690000, "eval_re": [135.65947437786554, 277.82037961704566, 
676.3703053682709, 275.8602683220155, 208.03337777270895, 338.1954327360121, 
135.94224943749884, 298.18697756349826, 883.1691841660245, 145.7082558342598], 
"eval_len": [26, 54, 141, 54, 40, 63, 26, 57, 163, 28]}

 70%|██████▉   | 699999/1000000 [11:48:00<3:44:43, 22.25it/s]global step 700000, trans_decision ep_re 215.52092231668948

{"global_step": 700000, "eval_re": [172.91731647355346, 410.60647201178534, 
356.60238441738005, 130.8400123044408, 331.2914243705691, 166.9844007913823, 
141.6859006202755, 166.70471835744834, 141.4241134516633, 136.1524803683963], 
"eval_len": [33, 74, 65, 25, 63, 32, 27, 32, 27, 26]}

 71%|███████   | 709997/1000000 [11:58:10<3:36:45, 22.30it/s]global step 710000, trans_decision ep_re 270.4904940580212

{"global_step": 710000, "eval_re": [171.58293967798375, 204.05530826245715, 
352.23820097294976, 695.884008078842, 141.00474793979933, 242.39397526245597, 
428.8865327493858, 161.79143912989758, 171.60559715191803, 135.4621913545229], 
"eval_len": [33, 39, 66, 139, 27, 46, 78, 31, 33, 26]}

 72%|███████▏  | 719997/1000000 [12:08:20<3:30:49, 22.14it/s]global step 720000, trans_decision ep_re 218.83504486375674

{"global_step": 720000, "eval_re": [130.9184816668024, 245.98678831394534, 
299.90049644149326, 125.26550056443267, 318.88961971026293, 220.68334239670585, 
146.01765472399507, 145.56536026726684, 362.91770697330514, 192.2054975793575], 
"eval_len": [25, 48, 58, 24, 62, 42, 28, 28, 68, 37]}

 73%|███████▎  | 729999/1000000 [12:18:30<3:24:33, 22.00it/s]global step 730000, trans_decision ep_re 257.5169837585779

{"global_step": 730000, "eval_re": [213.24099845228008, 176.29220740255877, 
219.8185707974422, 301.83333212675467, 738.2095086984809, 194.5493215779022, 
305.0680058819946, 120.23990486248638, 165.2162231334881, 140.70176465239092], 
"eval_len": [41, 34, 42, 58, 130, 37, 59, 23, 32, 27]}

 74%|███████▍  | 739999/1000000 [12:28:40<3:17:58, 21.89it/s]global step 740000, trans_decision ep_re 302.66615503157243

{"global_step": 740000, "eval_re": [135.53698778469425, 166.67674322780843, 
746.8326455343225, 357.47293969943183, 135.88659762138013, 396.2737563577914, 
447.05429086554926, 348.7055418282687, 135.6235191171749, 156.59852827930243], 
"eval_len": [26, 32, 142, 66, 26, 74, 81, 66, 26, 30]}

 75%|███████▍  | 749999/1000000 [12:38:50<3:09:19, 22.01it/s]global step 750000, trans_decision ep_re 223.87754940000522

{"global_step": 750000, "eval_re": [167.81133526350865, 182.1090027253125, 
367.45255865403925, 203.23139957802726, 281.5478717793488, 140.4641823573072, 
296.22114518831563, 219.5331193532724, 156.11946828673985, 224.28541081418047], 
"eval_len": [32, 35, 68, 39, 56, 27, 57, 42, 30, 43]}

 76%|███████▌  | 759999/1000000 [12:49:00<2:59:26, 22.29it/s]global step 760000, trans_decision ep_re 209.5049713974738

{"global_step": 760000, "eval_re": [130.6761279998721, 202.8585557245483, 
365.3892034646572, 125.48349489181723, 166.8290952104032, 183.05736851221417, 
135.72405085232032, 298.4518515536586, 345.28705624299516, 141.29290952225162], 
"eval_len": [25, 39, 68, 24, 32, 35, 26, 56, 66, 27]}

 77%|███████▋  | 769999/1000000 [12:59:10<2:52:37, 22.21it/s]global step 770000, trans_decision ep_re 203.91076769887093

{"global_step": 770000, "eval_re": [207.58536431759532, 198.53562315476654, 
161.2120993011988, 176.74348401616206, 135.81012074680655, 184.00094019479036, 
360.2996438302352, 307.0332438718206, 135.7013931698923, 172.1857643854415], 
"eval_len": [40, 38, 31, 34, 26, 35, 67, 60, 26, 33]}

 78%|███████▊  | 779999/1000000 [13:09:20<2:44:25, 22.30it/s]global step 780000, trans_decision ep_re 230.12360434679618

{"global_step": 780000, "eval_re": [312.07514653787325, 338.11465675799843, 
155.19126990141902, 323.2459445477001, 172.391373067309, 182.36404212933593, 
167.07978063408234, 145.5183179511497, 187.0621608064915, 318.1933511346025], 
"eval_len": [60, 64, 30, 62, 33, 35, 32, 28, 36, 62]}

 79%|███████▉  | 789999/1000000 [13:19:30<2:36:54, 22.31it/s]global step 790000, trans_decision ep_re 300.1115161757667

{"global_step": 790000, "eval_re": [588.5827159159558, 172.00322613755566, 
193.05986016273042, 171.8206596277974, 276.4689635688805, 151.57888516026125, 
569.757780115543, 141.36498547692102, 446.8592638960829, 289.6188216959394], 
"eval_len": [105, 33, 37, 33, 54, 29, 113, 27, 82, 56]}

 80%|███████▉  | 799999/1000000 [13:29:31<2:33:33, 21.71it/s]global step 800000, trans_decision ep_re 325.3355302629564

{"global_step": 800000, "eval_re": [166.75981882217758, 252.4374322799716, 
269.00789611202924, 333.26865847105086, 140.3510982699005, 350.22091245724084, 
171.58356015041818, 1146.6416364300087, 130.69640917363992, 292.3878804631267], 
"eval_len": [32, 48, 52, 63, 27, 65, 33, 211, 25, 57]}

 81%|████████  | 809997/1000000 [13:40:00<2:23:00, 22.14it/s]global step 810000, trans_decision ep_re 213.1950570711434

{"global_step": 810000, "eval_re": [167.91202130433274, 130.57616314936308, 
177.75004554845822, 335.9916818597868, 177.3127803766857, 412.6514597174216, 
187.80194813105126, 224.13801893812314, 182.25970404154646, 135.55674764466502],
"eval_len": [32, 25, 34, 62, 34, 75, 36, 43, 35, 26]}

 82%|████████▏ | 819999/1000000 [13:50:10<2:15:50, 22.09it/s]global step 820000, trans_decision ep_re 232.084719004643

{"global_step": 820000, "eval_re": [678.1446389427309, 171.824329209173, 
146.34864559211243, 156.97911202683062, 208.88215948399528, 157.4673941272553, 
146.7063611957118, 320.80030254709715, 171.51742174572826, 162.17682517579553], 
"eval_len": [129, 33, 28, 30, 40, 30, 28, 62, 33, 31]}

 83%|████████▎ | 829998/1000000 [14:00:20<2:04:51, 22.69it/s]global step 830000, trans_decision ep_re 299.57810877852125

{"global_step": 830000, "eval_re": [182.21404792891877, 162.53512818535592, 
331.49519648701914, 321.24086804052473, 240.5957358478476, 314.6615794157657, 
505.2040355164029, 208.38526153205262, 141.33740377012293, 588.1118310612023], 
"eval_len": [35, 31, 63, 62, 46, 62, 89, 40, 27, 105]}

 84%|████████▍ | 839998/1000000 [14:10:30<1:58:47, 22.45it/s]global step 840000, trans_decision ep_re 208.58609542857812

{"global_step": 840000, "eval_re": [367.1213766476415, 232.86181898704172, 
156.14816841161453, 177.76589594529793, 151.1142505705178, 136.18210814265873, 
389.83595476359017, 135.63761942728263, 151.51101717715164, 187.6827442129847], 
"eval_len": [70, 44, 30, 34, 29, 26, 74, 26, 29, 36]}

 85%|████████▍ | 849999/1000000 [14:20:40<1:52:39, 22.19it/s]global step 850000, trans_decision ep_re 300.6003102260568

{"global_step": 850000, "eval_re": [231.21185256518916, 141.28019610665834, 
177.88143049612583, 399.2195685576591, 491.8627062921526, 152.01257436278232, 
527.5797781111977, 288.9421576080215, 270.6138868577318, 325.39895130304967], 
"eval_len": [44, 27, 34, 73, 89, 29, 99, 56, 53, 63]}

 86%|████████▌ | 859999/1000000 [14:30:50<1:45:48, 22.05it/s]global step 860000, trans_decision ep_re 355.17247866832497

{"global_step": 860000, "eval_re": [176.95359442594957, 568.3424150331483, 
311.44732238800896, 145.86458795115763, 421.9178308535014, 614.5312843069884, 
558.7587673176918, 286.150590345682, 177.96753402670322, 289.79086003441785], 
"eval_len": [34, 109, 60, 28, 78, 127, 112, 56, 34, 56]}

 87%|████████▋ | 869999/1000000 [14:40:51<1:38:03, 22.10it/s]global step 870000, trans_decision ep_re 241.1365044554456

{"global_step": 870000, "eval_re": [226.02844366918677, 344.9753601078785, 
136.11672913380258, 172.4418649397475, 136.02420453706932, 309.8856019372916, 
157.4299280907614, 322.13389721149383, 316.08343600795376, 290.2455789192704], 
"eval_len": [43, 65, 26, 33, 26, 60, 30, 61, 60, 58]}

 88%|████████▊ | 879997/1000000 [14:51:20<1:30:43, 22.04it/s]global step 880000, trans_decision ep_re 232.11859659189068

{"global_step": 880000, "eval_re": [454.93750872140095, 156.6687713454922, 
197.7724552847601, 209.17669169544584, 203.38332426304936, 141.50740498677595, 
327.5521353984407, 160.78621055815813, 141.388430803104, 328.0130328622798], 
"eval_len": [84, 30, 38, 40, 39, 27, 63, 31, 27, 62]}

 89%|████████▉ | 889998/1000000 [15:01:30<1:22:28, 22.23it/s]global step 890000, trans_decision ep_re 233.74448668406657

{"global_step": 890000, "eval_re": [291.0209692949477, 225.007571554853, 
130.78951051335142, 338.48929309630375, 183.57892399427868, 225.29825091118553, 
156.180145238787, 292.28029945479386, 295.4932660481487, 199.3066367340159], 
"eval_len": [56, 43, 25, 64, 35, 43, 30, 55, 58, 38]}

 90%|████████▉ | 899999/1000000 [15:11:40<1:15:37, 22.04it/s]global step 900000, trans_decision ep_re 259.418783951509

{"global_step": 900000, "eval_re": [141.41612508527703, 329.25952091479104, 
358.3916767967969, 227.2543470214239, 289.0361356548476, 151.88352877261454, 
320.3526819915682, 220.8476512790834, 284.0512996134103, 271.694872385277], 
"eval_len": [27, 61, 67, 43, 57, 29, 59, 42, 55, 53]}

 91%|█████████ | 909999/1000000 [15:21:42<1:08:12, 21.99it/s]global step 910000, trans_decision ep_re 220.86176506621177

{"global_step": 910000, "eval_re": [301.7337192412913, 146.97232921866353, 
178.33610149206396, 283.58531219974026, 250.91662356443945, 146.79554129975722, 
319.3630367342083, 245.71104770678897, 142.16423326256358, 193.03970594260124], 
"eval_len": [58, 28, 34, 55, 49, 28, 61, 48, 27, 37]}

 92%|█████████▏| 919999/1000000 [15:32:10<1:00:06, 22.18it/s]global step 920000, trans_decision ep_re 231.48050167154187

{"global_step": 920000, "eval_re": [336.63915994848736, 272.1750459447294, 
173.46002211102893, 356.1669964338704, 329.8283320769327, 145.6179245008383, 
151.9228455089257, 276.7158376375814, 125.65705854148304, 146.62179401154182], 
"eval_len": [64, 51, 33, 67, 63, 28, 29, 54, 24, 28]}

 93%|█████████▎| 929998/1000000 [15:42:31<52:19, 22.29it/s]global step 930000, trans_decision ep_re 296.8389314946883

{"global_step": 930000, "eval_re": [356.6898646466575, 302.30587766948247, 
232.23111220788005, 291.17404978294127, 219.21981675180984, 156.4963639080993, 
351.91796465565176, 151.67074921253194, 289.5870035410708, 617.096512570758], 
"eval_len": [70, 58, 44, 56, 42, 30, 66, 29, 56, 114]}

 94%|█████████▍| 939999/1000000 [15:52:41<45:08, 22.16it/s]global step 940000, trans_decision ep_re 270.7064225207854

{"global_step": 940000, "eval_re": [289.54615614209996, 173.93102172016694, 
303.5628767130658, 298.139282769934, 374.9931885826167, 351.61441922021925, 
167.0678901814173, 220.68422469415646, 201.51746608543326, 326.00769909874475], 
"eval_len": [56, 33, 58, 58, 69, 66, 32, 42, 39, 61]}

 95%|█████████▍| 949998/1000000 [16:02:42<37:40, 22.12it/s]global step 950000, trans_decision ep_re 237.01147512838725

{"global_step": 950000, "eval_re": [184.17994422944358, 135.34614214769778, 
279.7668636837816, 287.54037958851086, 193.68308870699565, 352.3688724613889, 
161.38455961556107, 345.4884276347041, 299.56877677540234, 130.7876964403864], 
"eval_len": [35, 26, 54, 54, 37, 66, 31, 65, 57, 25]}

 96%|█████████▌| 959999/1000000 [16:13:11<30:03, 22.18it/s]global step 960000, trans_decision ep_re 237.11188936428394

{"global_step": 960000, "eval_re": [172.88248048071333, 178.2834575538082, 
168.65739760739697, 344.5064787879497, 182.9579395879784, 597.5161587661895, 
135.85400017701141, 156.61052455059436, 130.9144342872934, 302.93602184390414], 
"eval_len": [33, 34, 32, 64, 35, 107, 26, 30, 25, 58]}

 97%|█████████▋| 969999/1000000 [16:23:21<22:37, 22.10it/s]global step 970000, trans_decision ep_re 232.04307048815562

{"global_step": 970000, "eval_re": [150.42301506069157, 150.8413048752257, 
300.32672803621625, 403.38371661055214, 182.84228514589142, 172.0483046744294, 
135.92287050312558, 196.96359032871194, 283.9937344278958, 343.68515521881636], 
"eval_len": [29, 29, 57, 75, 35, 33, 26, 38, 55, 66]}

 98%|█████████▊| 979999/1000000 [16:33:31<15:03, 22.13it/s]global step 980000, trans_decision ep_re 192.1646445436513

{"global_step": 980000, "eval_re": [135.49113674360333, 192.66945707288116, 
151.23563059711844, 273.1569688363582, 134.94318077690235, 135.70387175617822, 
406.89275532079324, 204.18714437245256, 130.49605666881092, 156.87024329141443],
"eval_len": [26, 37, 29, 53, 26, 26, 76, 39, 25, 30]}

 99%|█████████▉| 989998/1000000 [16:43:51<07:29, 22.23it/s]global step 990000, trans_decision ep_re 363.5393176491722

{"global_step": 990000, "eval_re": [306.63900520243465, 697.2001434074388, 
304.9590735561429, 297.4406628789468, 291.046283392586, 171.87287350373396, 
342.6865429067625, 324.7415684010394, 177.90039245438368, 720.906630788253], 
"eval_len": [59, 123, 58, 57, 56, 33, 64, 61, 34, 147]}

100%|█████████▉| 999998/1000000 [16:54:01<00:00, 22.18it/s]global step 1000000, trans_decision ep_re 250.9788582169762

{"global_step": 1000000, "eval_re": [136.59913563334254, 214.53382977966072, 
178.10747353535027, 314.36062569079326, 156.305185406848, 145.9992030716015, 
350.4206302809621, 442.0699189477513, 345.78671295556046, 225.60586686789142], 
"eval_len": [26, 41, 34, 60, 30, 28, 66, 81, 67, 43]}

100%|██████████| 1000000/1000000 [16:54:04<00:00, 16.44it/s]
