
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:30<10:42:42, 25.67it/s]global step 10000, trans_decision ep_re 111.81054168096607

{"global_step": 10000, "eval_re": [180.03744146515638, 92.19606421076757, 
192.37425367703017, 74.3344019916702, 98.14441031818497, 25.429779178547435, 
173.9583829160628, 92.19222037080961, 75.3867806935158, 114.05168198791581], 
"eval_len": [105, 67, 111, 59, 65, 26, 98, 73, 60, 78]}

  2%|▏         | 19999/1000000 [13:10<10:37:56, 25.60it/s]global step 20000, trans_decision ep_re 113.27366518235556

{"global_step": 20000, "eval_re": [209.40751461887444, 26.61039054404588, 
121.19334426859349, 117.7691343314677, 141.2098970147194, 73.01935759586017, 
105.41506461373878, 113.7783325523461, 27.185814334425203, 197.1478019494844], 
"eval_len": [149, 29, 84, 81, 88, 58, 77, 78, 30, 122]}

  3%|▎         | 29997/1000000 [21:34<10:27:50, 25.75it/s]global step 30000, trans_decision ep_re 71.55232231883278

{"global_step": 30000, "eval_re": [70.80439548329309, 90.92433883775624, 
70.79872701222024, 25.092928220372077, 124.14434234694716, 67.6433063102595, 
26.135046065395468, 84.91859311293646, 129.77329329295193, 25.28825250619557], 
"eval_len": [60, 66, 49, 28, 95, 57, 29, 62, 90, 29]}

  4%|▍         | 39998/1000000 [30:20<10:17:55, 25.89it/s]global step 40000, trans_decision ep_re 88.6190609073706

{"global_step": 40000, "eval_re": [207.97674014758002, 29.059628855461796, 
26.91148492915599, 72.24863369926422, 130.640405745027, 148.50773890157137, 
127.22648569780378, 48.18605265695004, 25.29457863955359, 70.1388598013381], 
"eval_len": [169, 31, 31, 62, 103, 123, 100, 46, 28, 61]}

  5%|▍         | 49997/1000000 [39:00<10:19:19, 25.57it/s]global step 50000, trans_decision ep_re 161.17017393277635

{"global_step": 50000, "eval_re": [244.4685436532104, 182.01936994508657, 
89.79164321976656, 174.35693190452224, 145.55046681092549, 147.2643034801042, 
152.3263625433824, 177.15407972745038, 161.23335497176808, 137.53668307154754], 
"eval_len": [218, 157, 68, 150, 112, 118, 123, 146, 122, 111]}

  6%|▌         | 59999/1000000 [47:20<10:06:11, 25.84it/s]global step 60000, trans_decision ep_re 152.74994296822717

{"global_step": 60000, "eval_re": [100.10064245795485, 106.45965897805115, 
251.88704927994786, 235.9922522943774, 100.77105905939175, 101.63640759901706, 
166.19977809741334, 96.1278251021901, 165.36431262454457, 202.960444189384], 
"eval_len": [72, 80, 145, 138, 74, 79, 102, 76, 95, 116]}

  7%|▋         | 69999/1000000 [55:54<9:56:24, 25.99it/s]global step 70000, trans_decision ep_re 127.0220768467029

{"global_step": 70000, "eval_re": [121.0209688383735, 276.30916147575493, 
104.08708838904451, 127.3177227656131, 105.4196582325426, 119.903045111802, 
102.21010401642542, 94.68567711807833, 84.11039298481639, 135.1569495345781], 
"eval_len": [79, 149, 73, 80, 75, 79, 74, 77, 63, 89]}

  8%|▊         | 79998/1000000 [1:04:40<9:45:14, 26.20it/s]global step 80000, trans_decision ep_re 203.37304683259381

{"global_step": 80000, "eval_re": [209.03635230376292, 381.3576566590376, 
138.3513674249365, 192.7811165766334, 24.94046595002402, 246.32576948214987, 
136.51016384333633, 340.01588833368527, 27.53906438032748, 336.87262337204476], 
"eval_len": [126, 188, 90, 112, 32, 139, 89, 162, 32, 160]}

  9%|▉         | 89999/1000000 [1:13:20<9:44:23, 25.95it/s]global step 90000, trans_decision ep_re 219.97093323577297

{"global_step": 90000, "eval_re": [95.22582988388953, 475.4420077118058, 
189.01880097192245, 278.4862112436679, 171.249049262969, 142.93314848035024, 
280.683188721916, 150.29283776162933, 151.32949793655868, 265.0487603830207], 
"eval_len": [77, 211, 116, 147, 103, 94, 147, 99, 96, 143]}

 10%|▉         | 99999/1000000 [1:21:50<9:40:57, 25.82it/s]global step 100000, trans_decision ep_re 237.677859105277

{"global_step": 100000, "eval_re": [357.88150410559405, 151.12058159317993, 
183.77801607609928, 456.70684685642544, 132.09984336250852, 177.51473128258672, 
258.5141646601988, 168.28924516807973, 312.0524923821429, 178.8211655659549], 
"eval_len": [164, 97, 114, 222, 79, 108, 136, 99, 164, 108]}

 11%|█         | 109998/1000000 [1:30:30<9:27:09, 26.15it/s]global step 110000, trans_decision ep_re 240.61025753458733

{"global_step": 110000, "eval_re": [436.918103555541, 270.2516755990197, 
130.81880782945197, 154.27645610382788, 210.54527538376098, 318.3031883670501, 
316.5401922991054, 167.36566811457232, 160.96977887157772, 240.1134292219661], 
"eval_len": [190, 143, 90, 97, 111, 161, 163, 102, 102, 122]}

 12%|█▏        | 119999/1000000 [1:38:50<9:32:13, 25.63it/s]global step 120000, trans_decision ep_re 181.3902184733642

{"global_step": 120000, "eval_re": [144.18952380533227, 253.0737223950328, 
340.69065551869363, 142.09990098680262, 145.96635570774964, 21.28799529867426, 
129.5613242714703, 185.6442050742787, 224.2704200078578, 227.11808166774972], 
"eval_len": [86, 132, 183, 92, 95, 25, 83, 110, 121, 122]}

 13%|█▎        | 129998/1000000 [1:47:40<9:18:41, 25.95it/s]global step 130000, trans_decision ep_re 191.90520138055575

{"global_step": 130000, "eval_re": [158.72502610381898, 303.4980458470764, 
147.01926846307424, 166.80978914438518, 160.25496737035394, 148.93606659353514, 
189.67632516840976, 340.4585682295439, 146.9574518302314, 156.71650505512847], 
"eval_len": [97, 154, 90, 100, 85, 90, 111, 175, 88, 100]}

 14%|█▍        | 139999/1000000 [1:56:10<9:16:40, 25.75it/s]global step 140000, trans_decision ep_re 142.1306094972796

{"global_step": 140000, "eval_re": [362.7042985737907, 117.73812920035907, 
49.30675847867502, 64.14220331419826, 47.960335767082086, 206.53260634652787, 
325.0129005782455, 54.500703327824155, 144.03838036432685, 49.36977902176635], 
"eval_len": [167, 79, 47, 59, 48, 113, 163, 53, 88, 47]}

 15%|█▍        | 149999/1000000 [2:04:34<9:11:14, 25.70it/s]global step 150000, trans_decision ep_re 139.6320203764837

{"global_step": 150000, "eval_re": [243.22276588363044, 62.90075551360665, 
61.874271018720066, 217.02278565167657, 371.9017378255931, 140.352054379627, 
64.82154377483862, 42.62973719285523, 31.99444575232125, 159.60010677196811], 
"eval_len": [118, 57, 58, 118, 167, 92, 59, 47, 31, 94]}

 16%|█▌        | 159999/1000000 [2:13:11<9:07:33, 25.57it/s]global step 160000, trans_decision ep_re 67.90043300719384

{"global_step": 160000, "eval_re": [47.918214477360465, 75.8866276983476, 
43.38854008824274, 41.91360671038812, 89.28870112895954, 73.63899519625443, 
91.80029000329257, 75.73166136560233, 51.29130470279887, 88.14638870069182], 
"eval_len": [48, 65, 43, 45, 72, 62, 76, 63, 50, 78]}

 17%|█▋        | 169997/1000000 [2:22:00<8:57:52, 25.72it/s]global step 170000, trans_decision ep_re 89.37488930238428

{"global_step": 170000, "eval_re": [58.405818055431695, 34.660448234080846, 
47.45081868929376, 54.70333061674385, 42.87319611337488, 164.04063520195731, 
70.16798620175524, 301.4389970694023, 60.61760318853895, 59.39005965326375], 
"eval_len": [53, 40, 48, 54, 47, 98, 60, 148, 55, 55]}

 18%|█▊        | 179997/1000000 [2:30:40<8:53:01, 25.64it/s]global step 180000, trans_decision ep_re 225.73930011900194

{"global_step": 180000, "eval_re": [48.0728397359884, 392.88234089709067, 
187.78858582409762, 134.24974856181552, 46.82931453068652, 310.90013901942945, 
396.57407903595714, 198.6782614219761, 299.66631463590767, 241.75137752707025], 
"eval_len": [49, 192, 106, 81, 49, 158, 187, 112, 154, 134]}

 19%|█▉        | 189998/1000000 [2:39:01<8:35:41, 26.18it/s]global step 190000, trans_decision ep_re 257.9014609578563

{"global_step": 190000, "eval_re": [423.90791315641275, 258.49193341593946, 
165.69706368368747, 450.8275419549475, 393.4384946759874, 159.69806161009174, 
52.10570898172461, 25.480017121150162, 215.3358631012333, 434.0320118773886], 
"eval_len": [202, 143, 97, 219, 195, 92, 51, 33, 128, 201]}

 20%|█▉        | 199999/1000000 [2:47:50<8:36:25, 25.82it/s]global step 200000, trans_decision ep_re 310.846803774455

{"global_step": 200000, "eval_re": [188.4768249568999, 724.038223832612, 
201.25695606018203, 161.06217131426527, 426.8602723163039, 67.76382427710678, 
489.8373914575977, 41.36304990756187, 295.43488901973694, 512.3744346022837], 
"eval_len": [105, 272, 106, 93, 175, 69, 211, 41, 138, 234]}

 21%|██        | 209998/1000000 [2:56:30<8:32:08, 25.71it/s]global step 210000, trans_decision ep_re 267.61046503068957

{"global_step": 210000, "eval_re": [31.86967802456458, 563.9373446399547, 
594.3339211620287, 165.15213811386081, 272.26128929698, 273.1948308531393, 
141.96762122147442, 302.9306884188396, 190.5299401300076, 139.9271984460459], 
"eval_len": [36, 232, 227, 100, 132, 134, 88, 148, 103, 85]}

 22%|██▏       | 219998/1000000 [3:05:10<8:27:36, 25.61it/s]global step 220000, trans_decision ep_re 122.13525234706415

{"global_step": 220000, "eval_re": [47.11150443062808, 65.34051046311966, 
19.43984363738594, 638.0036660625012, 100.72784653263258, 109.71534110100147, 
37.47481784849382, 46.191533910181604, 73.75700696006574, 83.59045252463129], 
"eval_len": [50, 59, 23, 236, 73, 81, 42, 50, 62, 69]}

 23%|██▎       | 229999/1000000 [3:13:30<8:15:00, 25.93it/s]global step 230000, trans_decision ep_re 479.5827310917863

{"global_step": 230000, "eval_re": [167.53537172769947, 724.9786607956806, 
294.0097590432681, 454.62792703121374, 471.89838318500705, 271.729689129826, 
215.74707828457898, 1238.841427059426, 652.9642790929372, 303.4947355682248], 
"eval_len": [99, 269, 138, 178, 194, 125, 114, 421, 233, 141]}

 24%|██▍       | 239999/1000000 [3:22:20<8:08:10, 25.95it/s]global step 240000, trans_decision ep_re 228.81855093653218

{"global_step": 240000, "eval_re": [177.5829132032921, 218.56454442113852, 
209.1140989804255, 435.8975482388265, 82.21122136042983, 231.04634511124715, 
423.104980745738, 72.74202204633899, 307.2922266298854, 130.62960862799963], 
"eval_len": [113, 120, 113, 199, 73, 124, 196, 68, 158, 82]}

 25%|██▍       | 249997/1000000 [3:30:42<8:05:26, 25.75it/s]global step 250000, trans_decision ep_re 64.21456247260231

{"global_step": 250000, "eval_re": [183.2888071994709, 54.57525555554049, 
75.97772458125554, 56.225229481352535, 47.383579716336435, 51.615159664749115, 
38.15914851740849, 38.49851480307995, 45.83885829917372, 50.583346907656015], 
"eval_len": [99, 54, 68, 57, 46, 50, 42, 42, 46, 50]}

 26%|██▌       | 259998/1000000 [3:39:30<7:52:34, 26.10it/s]global step 260000, trans_decision ep_re 280.4991869360259

{"global_step": 260000, "eval_re": [357.68295237016673, 464.3965031099902, 
205.379439620071, 443.2365382540848, 599.6837584166722, 292.84137537588373, 
55.75390972555421, 40.18580843104138, 277.609395532322, 68.22218852447277], 
"eval_len": [155, 171, 108, 170, 214, 128, 55, 46, 128, 64]}

 27%|██▋       | 269999/1000000 [3:47:51<7:51:42, 25.79it/s]global step 270000, trans_decision ep_re 338.86048430854447

{"global_step": 270000, "eval_re": [711.5558724359342, 352.2817893399898, 
596.6878609239297, 154.78343788985404, 210.97506572770968, 66.25358671197249, 
431.61820153977, 201.57608312690812, 322.5727860447478, 340.3001593446291], 
"eval_len": [239, 169, 220, 92, 109, 61, 179, 102, 137, 159]}

 28%|██▊       | 279999/1000000 [3:56:40<7:45:04, 25.80it/s]global step 280000, trans_decision ep_re 69.76084840763058

{"global_step": 280000, "eval_re": [52.779479321753655, 33.77538966967933, 
363.2512230000096, 41.97532853873019, 26.129273634411053, 32.15691853202459, 
41.45482364015696, 37.74286946878749, 29.39289386254592, 38.95028440820695], 
"eval_len": [49, 39, 164, 44, 37, 39, 44, 40, 34, 42]}

 29%|██▉       | 289999/1000000 [4:05:01<7:45:32, 25.42it/s]global step 290000, trans_decision ep_re 128.24194359481118

{"global_step": 290000, "eval_re": [239.37884592846186, 21.309638645100822, 
127.36105183922541, 117.31160387398657, 218.35388123354124, 61.344133807714016, 
119.92208716572709, 199.70433218041853, 59.365693211856126, 118.36816806207995],
"eval_len": [116, 25, 87, 86, 115, 59, 82, 101, 53, 81]}

 30%|██▉       | 299998/1000000 [4:13:50<7:27:52, 26.05it/s]global step 300000, trans_decision ep_re 128.91781749243557

{"global_step": 300000, "eval_re": [153.40375809627747, 196.03141313802098, 
78.35474943711863, 90.94156863670604, 32.40288243927167, 316.913855427611, 
249.05913296983223, 35.78747887850921, 45.26093017923595, 91.02240572177271], 
"eval_len": [92, 106, 65, 73, 35, 146, 116, 39, 47, 76]}

 31%|███       | 309999/1000000 [4:22:20<7:25:58, 25.79it/s]global step 310000, trans_decision ep_re 212.3534031719354

{"global_step": 310000, "eval_re": [202.8638070618238, 238.99840546230874, 
91.1084778270389, 53.888589670163185, 254.54461844625033, 218.10493433362367, 
270.5749452443719, 236.3774308874602, 352.365767117642, 204.70705566867147], 
"eval_len": [104, 111, 76, 53, 122, 112, 131, 120, 156, 114]}

 32%|███▏      | 319999/1000000 [4:31:00<7:18:33, 25.84it/s]global step 320000, trans_decision ep_re 290.1176182209683

{"global_step": 320000, "eval_re": [67.18219397375725, 198.6170398970254, 
85.54456524196978, 298.3352449945321, 174.7658329435924, 508.68471442199717, 
87.6831227876813, 223.44601303973016, 741.548288288376, 515.3691666210215], 
"eval_len": [65, 101, 72, 137, 108, 190, 71, 122, 265, 207]}

 33%|███▎      | 329999/1000000 [4:39:20<7:09:51, 25.98it/s]global step 330000, trans_decision ep_re 423.1340253915355

{"global_step": 330000, "eval_re": [722.4527020491313, 72.85776507077551, 
210.12346033532296, 845.8041873808961, 52.10622491846416, 833.180035969332, 
747.8297389715427, 342.2445404965102, 186.10052197720566, 218.64107674617475], 
"eval_len": [263, 62, 113, 274, 53, 295, 250, 148, 93, 114]}

 34%|███▍      | 339999/1000000 [4:48:10<7:06:16, 25.80it/s]global step 340000, trans_decision ep_re 326.86046470183794

{"global_step": 340000, "eval_re": [360.75662650846397, 149.60870424620683, 
285.6597758011661, 235.0666617828847, 585.5720125731648, 448.6352493090067, 
96.50465652449451, 454.21223262678683, 201.4852939860055, 451.1034336601994], 
"eval_len": [166, 93, 133, 112, 231, 179, 81, 182, 111, 185]}

 35%|███▍      | 349999/1000000 [4:56:50<7:01:03, 25.73it/s]global step 350000, trans_decision ep_re 215.3878589561012

{"global_step": 350000, "eval_re": [38.40980331818398, 1128.271562921878, 
64.44979235468863, 272.03852325848896, 84.71790683619749, 52.455470477029436, 
51.41967531449769, 202.3690276295769, 42.28793076847736, 217.45889668199396], 
"eval_len": [42, 374, 58, 129, 67, 48, 50, 108, 41, 112]}

 36%|███▌      | 359999/1000000 [5:05:20<6:50:43, 25.97it/s]global step 360000, trans_decision ep_re 337.39298492502076

{"global_step": 360000, "eval_re": [18.223790984652886, 229.27826597168908, 
453.91501193587726, 202.74751574491646, 203.77785937532397, 250.28483516783723, 
468.3208241041893, 121.93705143962552, 464.79454410314395, 960.6501504229517], 
"eval_len": [23, 117, 184, 106, 110, 137, 190, 84, 186, 332]}

 37%|███▋      | 369997/1000000 [5:14:00<6:49:54, 25.62it/s]global step 370000, trans_decision ep_re 195.87050206536452

{"global_step": 370000, "eval_re": [279.41387532777463, 87.02814838837429, 
68.99503105262747, 325.7741488250008, 57.854764557556415, 200.35661058395218, 
226.08319127547662, 438.1038749415257, 71.12393637874136, 203.9714393226159], 
"eval_len": [133, 75, 63, 149, 54, 107, 113, 177, 64, 106]}

 38%|███▊      | 379997/1000000 [5:22:22<6:45:36, 25.48it/s]global step 380000, trans_decision ep_re 308.1125620953145

{"global_step": 380000, "eval_re": [404.9456133298159, 787.8359200857325, 
73.66094527283843, 176.80454755337942, 459.0502281600609, 46.05248955602788, 
183.76691435434049, 576.035399151989, 191.5669739613544, 181.40658952760586], 
"eval_len": [172, 276, 68, 97, 188, 48, 101, 211, 101, 95]}

 39%|███▉      | 389999/1000000 [5:31:01<6:31:16, 25.98it/s]global step 390000, trans_decision ep_re 351.3144742818111

{"global_step": 390000, "eval_re": [202.98954106127167, 123.8549471098386, 
168.7978782345339, 455.2124806140176, 800.6641312093675, 215.51909422177513, 
196.91361794843684, 587.1566839107944, 303.6427262003778, 458.3936423076979], 
"eval_len": [112, 88, 90, 179, 295, 116, 112, 208, 142, 178]}

 40%|███▉      | 399999/1000000 [5:39:40<6:29:07, 25.70it/s]global step 400000, trans_decision ep_re 254.25794174369358

{"global_step": 400000, "eval_re": [479.79671153463704, 226.94315384036082, 
1058.5367782886954, 39.289123717832986, 78.64416706864638, 72.12322388182116, 
226.4259470640005, 245.47525464808123, 40.5655034840268, 74.77955390883345], 
"eval_len": [190, 108, 323, 41, 67, 60, 116, 123, 44, 67]}

 41%|████      | 409999/1000000 [5:48:30<6:20:44, 25.83it/s]global step 410000, trans_decision ep_re 246.55805273742467

{"global_step": 410000, "eval_re": [200.34143179161651, 363.320820102796, 
167.42592891133685, 406.5868480163747, 304.3304738191178, 141.97655076490642, 
42.77174386174111, 175.3639230371385, 629.8296422483521, 33.63316482086648], 
"eval_len": [98, 146, 92, 148, 132, 82, 43, 92, 212, 36]}

 42%|████▏     | 419997/1000000 [5:57:10<6:14:34, 25.81it/s]global step 420000, trans_decision ep_re 255.97374769009815

{"global_step": 420000, "eval_re": [88.25134710027643, 899.5165022174399, 
521.180682925205, 39.06373591214655, 32.84775247569637, 39.793730311163785, 
152.67228405718768, 77.56250079081575, 67.09321166809839, 641.755729442952], 
"eval_len": [69, 307, 210, 40, 38, 39, 93, 64, 61, 236]}

 43%|████▎     | 429998/1000000 [6:05:31<6:02:47, 26.19it/s]global step 430000, trans_decision ep_re 219.75568384729485

{"global_step": 430000, "eval_re": [50.245575882250506, 183.36338310934204, 
201.0084722962795, 294.8863594599886, 72.7916219261112, 179.16630898027674, 
125.6223664414977, 124.3087453165536, 770.5801711325789, 195.58383392806957], 
"eval_len": [51, 98, 107, 133, 62, 99, 83, 82, 271, 103]}

 44%|████▍     | 439997/1000000 [6:14:20<6:05:36, 25.53it/s]global step 440000, trans_decision ep_re 266.5048109549008

{"global_step": 440000, "eval_re": [178.560450084434, 672.9338575654493, 
217.92683728151954, 200.43572468378727, 200.18567935325044, 69.2612397174404, 
375.6139100729696, 49.5145102505125, 486.70423267123954, 213.91166786840552], 
"eval_len": [99, 245, 111, 99, 105, 60, 161, 50, 192, 110]}

 45%|████▍     | 449998/1000000 [6:23:00<5:51:15, 26.10it/s]global step 450000, trans_decision ep_re 206.28145793294516

{"global_step": 450000, "eval_re": [381.7583853166229, 550.9614323018276, 
358.78447559348314, 34.7462778917416, 64.3714545408252, 229.6913305590248, 
61.72324971498355, 31.339539971523873, 30.723182284286327, 318.7152511551327], 
"eval_len": [157, 201, 148, 40, 59, 119, 55, 33, 34, 133]}

 46%|████▌     | 459999/1000000 [6:31:21<5:47:40, 25.89it/s]global step 460000, trans_decision ep_re 236.06208147027388

{"global_step": 460000, "eval_re": [92.90943830176963, 488.5507214726002, 
850.2219978760895, 24.88686044948321, 70.5231369457674, 82.0590201796881, 
232.23519128853093, 229.90419877266123, 28.272502623765583, 261.057746792383], 
"eval_len": [83, 193, 303, 29, 64, 73, 115, 120, 37, 120]}

 47%|████▋     | 469997/1000000 [6:40:10<5:43:41, 25.70it/s]global step 470000, trans_decision ep_re 140.60143703873493

{"global_step": 470000, "eval_re": [29.12699386328165, 513.9340801270318, 
213.21797128793906, 32.399864534996965, 29.112688467005707, 62.164756665509785, 
28.809782481987952, 403.76101257906276, 24.136380667169682, 69.35083971336394], 
"eval_len": [32, 198, 116, 36, 33, 57, 29, 169, 26, 62]}

 48%|████▊     | 479999/1000000 [6:48:31<5:38:46, 25.58it/s]global step 480000, trans_decision ep_re 353.0869806256596

{"global_step": 480000, "eval_re": [378.79769236114345, 384.94897640221586, 
392.58390170605537, 188.0985051500118, 42.63040274745434, 537.506698649996, 
483.9529483521123, 500.70463518869065, 122.28276926069366, 499.3632764382222], 
"eval_len": [155, 159, 164, 106, 43, 210, 190, 191, 83, 193]}

 49%|████▉     | 489997/1000000 [6:57:20<5:33:37, 25.48it/s]global step 490000, trans_decision ep_re 85.16520256407782

{"global_step": 490000, "eval_re": [33.88608505308093, 29.13492378457711, 
84.85018893708433, 47.51310841806061, 37.409623349263185, 35.966755106405124, 
42.34417767406309, 49.93422880530459, 35.3011001322584, 455.31183438068075], 
"eval_len": [38, 38, 71, 46, 40, 40, 41, 45, 41, 177]}

 50%|████▉     | 499999/1000000 [7:05:41<5:21:35, 25.91it/s]global step 500000, trans_decision ep_re 49.61410836402945

{"global_step": 500000, "eval_re": [58.678703485772935, 88.41948483563071, 
33.8078269408039, 32.055053007437714, 27.53486083656802, 32.98305775863509, 
49.770027828863405, 82.82086064974976, 66.21058495131211, 23.860623345520768], 
"eval_len": [56, 77, 36, 32, 29, 36, 47, 69, 62, 28]}

 51%|█████     | 509999/1000000 [7:14:30<5:15:26, 25.89it/s]global step 510000, trans_decision ep_re 220.24339308749404

{"global_step": 510000, "eval_re": [32.2316275157125, 40.34167050800112, 
365.1260227168403, 33.12945387029866, 275.2523626330078, 279.34041612972715, 
333.55957614843413, 38.70328036974379, 769.7969676558632, 34.95255332731197], 
"eval_len": [37, 42, 161, 38, 124, 126, 140, 42, 273, 38]}

 52%|█████▏    | 519999/1000000 [7:23:00<5:10:56, 25.73it/s]global step 520000, trans_decision ep_re 204.86126569976756

{"global_step": 520000, "eval_re": [49.73291687452352, 31.77942203643028, 
517.4319204992021, 67.78856833470455, 358.64732755785496, 231.6249297003217, 
32.07442698890722, 422.4045191835862, 290.82304267758326, 46.305583144561325], 
"eval_len": [47, 37, 198, 61, 152, 117, 34, 172, 132, 45]}

 53%|█████▎    | 529999/1000000 [7:31:40<5:04:18, 25.74it/s]global step 530000, trans_decision ep_re 240.27681160564316

{"global_step": 530000, "eval_re": [325.7990649279746, 325.30257419191435, 
56.527046454190234, 262.4370595324341, 254.17281840894728, 34.189727871341496, 
36.235119499867935, 80.48335105831686, 32.39877990494475, 995.2225742065002], 
"eval_len": [145, 143, 56, 121, 120, 38, 36, 67, 35, 323]}

 54%|█████▍    | 539999/1000000 [7:40:10<4:57:32, 25.77it/s]global step 540000, trans_decision ep_re 183.089861420918

{"global_step": 540000, "eval_re": [31.451687252136995, 296.3496184614484, 
200.50322465754084, 544.1882455242483, 160.03007313115842, 38.115058298750256, 
70.75981071709344, 221.1128152054301, 228.7019359734073, 39.686144987965804], 
"eval_len": [37, 131, 109, 223, 101, 40, 65, 121, 114, 40]}

 55%|█████▍    | 549998/1000000 [7:48:50<4:45:28, 26.27it/s]global step 550000, trans_decision ep_re 311.15791559215893

{"global_step": 550000, "eval_re": [21.82722882143195, 370.7493553038501, 
877.7613322108595, 456.2883281906722, 31.55386740337315, 375.2258833646877, 
210.18445828240806, 257.9213420335214, 471.5094000130956, 38.5579602976891], 
"eval_len": [23, 160, 297, 175, 38, 150, 103, 123, 182, 43]}

 56%|█████▌    | 559999/1000000 [7:57:10<4:43:19, 25.88it/s]global step 560000, trans_decision ep_re 270.5098459146852

{"global_step": 560000, "eval_re": [183.77497013324623, 548.4313875521591, 
256.37830766203075, 452.21792213134904, 196.1940577699294, 39.16399586195178, 
401.68112621774736, 251.10996681603012, 224.43458735407077, 151.7121376483377], 
"eval_len": [94, 203, 118, 172, 101, 42, 163, 121, 112, 90]}

 57%|█████▋    | 569997/1000000 [8:06:00<4:38:14, 25.76it/s]global step 570000, trans_decision ep_re 112.45593172291922

{"global_step": 570000, "eval_re": [227.38135211514341, 68.53034973882245, 
183.58173613009836, 102.99702247420402, 175.50819180423204, 65.07414276358895, 
28.458479474419057, 33.31395522806214, 170.40725664133183, 69.30683085928992], 
"eval_len": [108, 59, 105, 76, 97, 59, 34, 35, 93, 59]}

 58%|█████▊    | 579999/1000000 [8:14:21<4:31:09, 25.82it/s]global step 580000, trans_decision ep_re 279.0361795187197

{"global_step": 580000, "eval_re": [192.44613771777352, 49.584261222601974, 
274.5685746913409, 355.94283056670497, 751.6148736337497, 44.53016374093394, 
347.25645018189556, 205.70138953312346, 320.4978185338098, 248.2192953652632], 
"eval_len": [101, 51, 134, 148, 273, 42, 140, 106, 144, 124]}

 59%|█████▉    | 589997/1000000 [8:23:10<4:27:27, 25.55it/s]global step 590000, trans_decision ep_re 236.21943563982444

{"global_step": 590000, "eval_re": [46.44332352145461, 45.49468265746366, 
44.660992507284355, 317.6018085767297, 284.259363624627, 30.006413817678915, 
570.3604710720003, 302.33120770113493, 663.0719530945875, 57.964139825283496], 
"eval_len": [46, 46, 47, 139, 126, 35, 224, 135, 244, 52]}

 60%|█████▉    | 599997/1000000 [8:31:33<4:21:10, 25.53it/s]global step 600000, trans_decision ep_re 106.30408841654814

{"global_step": 600000, "eval_re": [27.11058797562444, 206.8653818587293, 
48.743218792364395, 28.39576208315096, 268.2835610986166, 30.22737015306662, 
338.8642691580668, 24.301135020582986, 26.371558539464246, 63.87803948581505], 
"eval_len": [30, 110, 48, 30, 124, 34, 149, 31, 34, 62]}

 61%|██████    | 609997/1000000 [8:40:20<4:13:23, 25.65it/s]global step 610000, trans_decision ep_re 42.51146106160867

{"global_step": 610000, "eval_re": [41.16835967554302, 70.96286452126468, 
35.76704115800761, 32.91883799820066, 46.798525571121836, 53.34302397445755, 
34.78811538407831, 43.554621994692866, 25.281093390548385, 40.53212694817176], 
"eval_len": [49, 85, 43, 38, 46, 50, 38, 46, 29, 44]}

 62%|██████▏   | 619999/1000000 [8:48:41<4:06:09, 25.73it/s]global step 620000, trans_decision ep_re 196.10209438893997

{"global_step": 620000, "eval_re": [111.60445501811904, 345.96900531075437, 
505.99425802156117, 121.74288995526217, 121.5771084867122, 131.18763474697116, 
223.56083345898264, 128.0647770666397, 138.7244819052494, 132.5954999191478], 
"eval_len": [75, 156, 213, 80, 78, 82, 122, 79, 91, 86]}

 63%|██████▎   | 629997/1000000 [8:57:30<3:59:59, 25.70it/s]global step 630000, trans_decision ep_re 132.46262416401598

{"global_step": 630000, "eval_re": [23.611664016714755, 52.34522974423587, 
42.40764054818999, 353.3125439669932, 358.93462526772663, 41.9798177015071, 
305.5911010339479, 39.89891865556231, 67.84245397875908, 38.70224672652312], 
"eval_len": [28, 50, 46, 151, 148, 46, 136, 43, 62, 42]}

 64%|██████▍   | 639999/1000000 [9:05:53<3:51:07, 25.96it/s]global step 640000, trans_decision ep_re 262.0602582250678

{"global_step": 640000, "eval_re": [197.57620604742854, 264.41576249441175, 
48.29900374773483, 277.81262567374023, 34.19374717161004, 40.09561372198629, 
655.8731899955434, 204.8362647404363, 363.81078138726195, 533.6893872705248], 
"eval_len": [99, 127, 52, 122, 39, 41, 243, 112, 153, 200]}

 65%|██████▍   | 649999/1000000 [9:14:31<3:46:24, 25.76it/s]global step 650000, trans_decision ep_re 332.9929212485557

{"global_step": 650000, "eval_re": [402.00422457833093, 366.6179612365747, 
258.26149513791813, 43.18450072467901, 687.9654476668245, 432.0097594335932, 
36.01550972980071, 54.65751334725499, 994.665035828279, 54.54776480230194], 
"eval_len": [163, 153, 118, 43, 255, 171, 43, 59, 325, 53]}

 66%|██████▌   | 659999/1000000 [9:23:10<3:40:40, 25.68it/s]global step 660000, trans_decision ep_re 221.30067771659697

{"global_step": 660000, "eval_re": [205.39255089799005, 36.98669724716093, 
51.62073543573767, 600.5645068281841, 185.0153319563638, 52.96902570549159, 
151.96761058250715, 338.6210149826496, 318.0367363359383, 271.8325671939466], 
"eval_len": [110, 42, 45, 215, 106, 56, 89, 148, 138, 128]}

 67%|██████▋   | 669997/1000000 [9:32:00<3:33:28, 25.76it/s]global step 670000, trans_decision ep_re 107.69324338994798

{"global_step": 670000, "eval_re": [30.61661877859371, 65.06490425720898, 
32.03680212554722, 213.42983173258008, 447.38399959553425, 153.74954477356718, 
25.862591765551382, 49.78177558941841, 28.55589058442237, 30.450474697056205], 
"eval_len": [32, 59, 35, 96, 173, 93, 29, 48, 36, 35]}

 68%|██████▊   | 679999/1000000 [9:40:40<3:26:38, 25.81it/s]global step 680000, trans_decision ep_re 377.86693626888956

{"global_step": 680000, "eval_re": [740.8984180039268, 491.5095166973319, 
438.8671366904028, 43.66489679745656, 108.25951966019956, 118.3951681749239, 
351.5596643163537, 149.67148215440545, 721.7394650955389, 614.1040950983562], 
"eval_len": [239, 177, 160, 44, 73, 80, 151, 86, 238, 223]}

 69%|██████▉   | 689998/1000000 [9:49:02<3:19:47, 25.86it/s]global step 690000, trans_decision ep_re 91.5345306837237

{"global_step": 690000, "eval_re": [38.09277416365553, 29.958651036356933, 
32.495414058131345, 275.7056459996171, 126.44254882330893, 36.019960503256556, 
24.03323947665001, 123.50722546283635, 194.39250238674944, 34.6973449266748], 
"eval_len": [38, 35, 38, 149, 89, 36, 31, 85, 105, 39]}

 70%|██████▉   | 699997/1000000 [9:57:50<3:14:29, 25.71it/s]global step 700000, trans_decision ep_re 183.63185002051446

{"global_step": 700000, "eval_re": [90.14150413032601, 57.818385347634724, 
208.18081466279997, 31.888362970979703, 19.73426907842029, 289.4429672889189, 
30.43394689117879, 104.32459841534401, 909.9335821742806, 94.42006924526143], 
"eval_len": [68, 59, 109, 39, 26, 130, 38, 79, 320, 81]}

 71%|███████   | 709997/1000000 [10:06:15<3:09:49, 25.46it/s]global step 710000, trans_decision ep_re 140.7458562432219

{"global_step": 710000, "eval_re": [67.20562421655845, 50.65496485458611, 
37.27703484261804, 29.374416584382278, 417.45337470589243, 471.1883690482039, 
205.21382118058526, 38.99621785798019, 32.508832827889336, 57.585906313522976], 
"eval_len": [58, 51, 40, 36, 169, 170, 107, 44, 38, 52]}

 72%|███████▏  | 719999/1000000 [10:14:50<3:02:47, 25.53it/s]global step 720000, trans_decision ep_re 147.3401698804882

{"global_step": 720000, "eval_re": [171.36167211066527, 38.831442427220054, 
159.11784651493244, 35.42866271249377, 315.3674064442508, 117.7670517156395, 
169.07354503232284, 53.577493758030855, 210.380906900852, 202.49567118847435], 
"eval_len": [104, 40, 92, 38, 146, 78, 94, 51, 108, 110]}

 73%|███████▎  | 729999/1000000 [10:23:40<2:54:53, 25.73it/s]global step 730000, trans_decision ep_re 381.33638075365235

{"global_step": 730000, "eval_re": [191.50216922983364, 438.5792324055627, 
695.5706940323754, 309.3982266716245, 61.33479868571857, 558.0755444995802, 
557.0631337247976, 465.09789970901863, 196.38282870722156, 340.3592798707907], 
"eval_len": [109, 171, 250, 135, 65, 206, 202, 175, 101, 150]}

 74%|███████▍  | 739998/1000000 [10:32:02<2:45:34, 26.17it/s]global step 740000, trans_decision ep_re 85.98090510060042

{"global_step": 740000, "eval_re": [42.821388202730134, 363.48140703904306, 
54.82823148991496, 55.70210747842853, 31.288886464535896, 56.26203743629133, 
74.53901528038186, 74.94791828403896, 53.97993838301029, 51.95812094762911], 
"eval_len": [45, 153, 52, 51, 37, 53, 66, 61, 52, 51]}

 75%|███████▍  | 749997/1000000 [10:40:50<2:40:57, 25.89it/s]global step 750000, trans_decision ep_re 157.20184987514887

{"global_step": 750000, "eval_re": [171.9733223695912, 64.22383227330823, 
361.0095495431039, 38.96384643926444, 157.8498175648067, 51.637801450326315, 
31.82929616737132, 33.175544382439156, 456.78366917214703, 204.57181938913047], 
"eval_len": [101, 60, 154, 40, 93, 49, 37, 39, 177, 104]}

 76%|███████▌  | 759999/1000000 [10:49:10<2:35:00, 25.80it/s]global step 760000, trans_decision ep_re 128.18098889707383

{"global_step": 760000, "eval_re": [35.363892285150875, 45.45085351810853, 
35.61066522936763, 689.594990467872, 34.26774173491927, 47.534327299507616, 
21.993719606934846, 304.32443661697323, 26.600264966617747, 41.0689972452864], 
"eval_len": [40, 41, 39, 238, 38, 47, 27, 136, 37, 39]}

 77%|███████▋  | 769999/1000000 [10:57:44<2:28:21, 25.84it/s]global step 770000, trans_decision ep_re 172.4253967704108

{"global_step": 770000, "eval_re": [375.9822143920652, 626.0469386241937, 
46.57609960962977, 30.934051485874086, 27.826885926603843, 51.427459621789524, 
414.2741713333111, 40.248612989732585, 34.84028918953063, 76.09724453137765], 
"eval_len": [156, 219, 47, 35, 34, 47, 161, 44, 40, 66]}

 78%|███████▊  | 779998/1000000 [11:06:30<2:20:56, 26.02it/s]global step 780000, trans_decision ep_re 84.48177799140085

{"global_step": 780000, "eval_re": [34.91130375153822, 40.51660657572095, 
32.09293476400871, 347.775458025557, 38.15046101047361, 34.95067694458768, 
203.88128215230162, 28.892934826980035, 41.986244160129246, 41.65987770271159], 
"eval_len": [40, 40, 37, 146, 40, 40, 105, 36, 42, 42]}

 79%|███████▉  | 789998/1000000 [11:14:52<2:13:15, 26.27it/s]global step 790000, trans_decision ep_re 124.24911426896162

{"global_step": 790000, "eval_re": [38.58775838425394, 33.10375300657409, 
185.25638147578445, 45.6182781355509, 28.054877043069446, 36.07674142259345, 
607.8142193078288, 41.413342079954624, 189.8565819200889, 36.709209913917725], 
"eval_len": [40, 38, 95, 49, 35, 41, 222, 42, 99, 39]}

 80%|███████▉  | 799997/1000000 [11:23:40<2:08:15, 25.99it/s]global step 800000, trans_decision ep_re 89.76470847475932

{"global_step": 800000, "eval_re": [126.0145253778586, 36.99605816461937, 
39.83995366238299, 470.58971801090183, 47.44714944938287, 29.746583387681994, 
30.863341457634597, 46.65144011493566, 35.60555718552347, 33.89275793667179], 
"eval_len": [91, 40, 41, 175, 44, 39, 38, 48, 40, 39]}

 81%|████████  | 809999/1000000 [11:32:01<2:02:39, 25.82it/s]global step 810000, trans_decision ep_re 48.34877318514593

{"global_step": 810000, "eval_re": [31.632692746953907, 35.76028458520526, 
40.98507514492479, 35.920307128448975, 173.4683232106009, 35.27110186699549, 
24.43440306391497, 42.77882604767084, 32.63894923924225, 30.597768817501887], 
"eval_len": [34, 38, 43, 39, 92, 37, 35, 43, 34, 34]}

 82%|████████▏ | 819998/1000000 [11:40:50<1:55:24, 26.00it/s]global step 820000, trans_decision ep_re 90.27466303935678

{"global_step": 820000, "eval_re": [31.163294991304625, 30.845049188372855, 
35.884041573665414, 26.300341794633347, 23.7550094248805, 265.8283444212554, 
38.74465545608892, 32.23076212795583, 378.36183746416765, 39.63329395124331], 
"eval_len": [37, 36, 38, 31, 32, 122, 41, 37, 157, 40]}

 83%|████████▎ | 829999/1000000 [11:49:20<1:50:29, 25.64it/s]global step 830000, trans_decision ep_re 35.78785539377858

{"global_step": 830000, "eval_re": [29.791157573254498, 29.284438769391674, 
34.26619840461417, 29.60250158708967, 50.90055672941947, 41.47674632493978, 
49.72347753767569, 31.883890767379164, 31.562421260145946, 29.387164983875767], 
"eval_len": [34, 34, 38, 37, 48, 45, 49, 35, 37, 31]}

 84%|████████▍ | 839997/1000000 [11:57:43<1:42:55, 25.91it/s]global step 840000, trans_decision ep_re 92.46112808908165

{"global_step": 840000, "eval_re": [35.35349468531269, 456.96974685684245, 
55.529933219749495, 42.94374715207987, 48.95871431145918, 32.36823232771147, 
38.35892313951309, 36.51888504804932, 28.13107598401586, 149.47852816608304], 
"eval_len": [39, 174, 53, 42, 44, 38, 40, 40, 31, 92]}

 85%|████████▍ | 849997/1000000 [12:06:30<1:36:04, 26.02it/s]global step 850000, trans_decision ep_re 103.8421264909909

{"global_step": 850000, "eval_re": [43.14358515010949, 44.74981287736331, 
25.017833443542973, 259.3833466665554, 56.158116287445765, 25.667964650892163, 
42.15643838532393, 432.258206302508, 32.077544964883266, 77.80841618128488], 
"eval_len": [44, 46, 31, 129, 55, 31, 41, 168, 32, 64]}

 86%|████████▌ | 859998/1000000 [12:15:00<1:28:24, 26.39it/s]global step 860000, trans_decision ep_re 48.89505974240422

{"global_step": 860000, "eval_re": [23.934079120108027, 33.61421065707928, 
28.76903360523949, 41.16378923381327, 201.2601858497344, 41.6026552482818, 
31.40620326980142, 28.032132614869475, 27.534504812298977, 31.633803012816088], 
"eval_len": [30, 36, 35, 40, 102, 41, 37, 33, 35, 35]}

 87%|████████▋ | 869999/1000000 [12:23:30<1:23:59, 25.80it/s]global step 870000, trans_decision ep_re 981.9167035170434

{"global_step": 870000, "eval_re": [1000.0029549318907, 1001.9335202770212, 
1010.7227058562706, 763.3138931545521, 1001.8889860263369, 983.167017480212, 
1021.3971020443646, 1010.2927236151523, 999.2080344063348, 1027.240097378298], 
"eval_len": [1000, 1000, 1000, 789, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879998/1000000 [12:32:26<1:16:18, 26.21it/s]global step 880000, trans_decision ep_re 61.61696192088368

{"global_step": 880000, "eval_re": [83.32778868828898, 23.918485695099765, 
50.9265113346411, 54.24500070542665, 88.64653648158463, 52.3425629897793, 
56.998455571022994, 69.76577169077325, 80.88480825781821, 55.11369779440185], 
"eval_len": [63, 27, 44, 45, 64, 46, 49, 57, 55, 47]}

 89%|████████▉ | 889999/1000000 [12:41:01<1:10:31, 26.00it/s]global step 890000, trans_decision ep_re 191.20350499689826

{"global_step": 890000, "eval_re": [177.5956517101539, 183.34814619974682, 
757.4544800248118, 177.8614170528909, 146.22306136815567, 43.035432448935275, 
35.99057472541047, 30.843172498782284, 316.4791824192501, 43.20393152084566], 
"eval_len": [102, 99, 243, 100, 94, 46, 38, 36, 134, 43]}

 90%|████████▉ | 899999/1000000 [12:49:34<1:03:55, 26.07it/s]global step 900000, trans_decision ep_re 35.80460810330594

{"global_step": 900000, "eval_re": [34.88095606676864, 32.59598818058161, 
33.43846182894606, 28.856046452913283, 47.540885387431146, 29.245233353034482, 
48.54697412249174, 35.564144900701166, 36.41524801500254, 30.962142725188706], 
"eval_len": [40, 39, 36, 35, 48, 37, 45, 38, 36, 35]}

 91%|█████████ | 909998/1000000 [12:58:04<57:39, 26.01it/s]global step 910000, trans_decision ep_re 43.80787335249628

{"global_step": 910000, "eval_re": [35.69558947476039, 70.1540117060596, 
69.11888338777662, 38.01966683987598, 34.598093279534915, 33.137455807094796, 
63.43118057261137, 34.54331026403833, 26.70951997617901, 32.671022217031755], 
"eval_len": [36, 62, 60, 37, 37, 35, 54, 38, 35, 38]}

 92%|█████████▏| 919998/1000000 [13:06:34<51:07, 26.08it/s]global step 920000, trans_decision ep_re 90.6070285011133

{"global_step": 920000, "eval_re": [24.151019083962545, 44.81063713120705, 
29.096796362428925, 29.912424531084994, 28.49396492400952, 629.4339454833952, 
29.20296041833232, 25.40768025113887, 26.443730541763916, 39.11712628380964], 
"eval_len": [33, 42, 32, 32, 31, 242, 34, 31, 32, 39]}

 93%|█████████▎| 929998/1000000 [13:15:04<44:14, 26.37it/s]global step 930000, trans_decision ep_re 30.957105643770205

{"global_step": 930000, "eval_re": [35.83667176281516, 30.990655776325276, 
27.76575882812451, 25.580936197701718, 34.02646008929455, 27.73039330164372, 
32.194539613382794, 36.05282905336069, 31.462765264041053, 27.930046551012516], 
"eval_len": [37, 36, 33, 29, 36, 32, 35, 38, 36, 33]}

 94%|█████████▍| 939999/1000000 [13:23:33<38:27, 26.00it/s]global step 940000, trans_decision ep_re 107.536888931957

{"global_step": 940000, "eval_re": [41.12890654793049, 33.39844615208001, 
29.632045812140987, 40.79312985309279, 35.303780253205346, 27.612435192005545, 
46.95020300955137, 25.198401226991567, 32.73992444201443, 762.6116168305575], 
"eval_len": [39, 38, 31, 41, 36, 33, 48, 29, 37, 248]}

 95%|█████████▍| 949998/1000000 [13:32:04<31:37, 26.35it/s]global step 950000, trans_decision ep_re 68.06630249527895

{"global_step": 950000, "eval_re": [42.17330581718804, 34.13377718634084, 
69.15991996734626, 64.10434476756232, 275.3384000076077, 38.80953304313021, 
36.40732446615014, 33.50663327932072, 56.216973209582974, 30.81281320856026], 
"eval_len": [43, 37, 65, 58, 122, 40, 39, 35, 56, 37]}

 96%|█████████▌| 959998/1000000 [13:40:35<25:12, 26.45it/s]global step 960000, trans_decision ep_re 51.15895165142858

{"global_step": 960000, "eval_re": [25.646958361839488, 25.592343519466223, 
279.5015307905201, 24.526732152462774, 24.254613718164386, 31.48989626477575, 
25.477313510493097, 27.577763796861664, 23.575359876887482, 23.947004522814726],
"eval_len": [38, 38, 126, 39, 38, 37, 38, 37, 38, 38]}

 97%|█████████▋| 969997/1000000 [13:49:05<19:17, 25.91it/s]global step 970000, trans_decision ep_re 25.18394325033978

{"global_step": 970000, "eval_re": [26.38097430770322, 21.83618435257343, 
25.10472637732993, 26.648159322322613, 24.85682105277204, 22.068619359943774, 
24.247695851748926, 26.282590467541844, 25.71735723128368, 28.696304180178384], 
"eval_len": [36, 36, 36, 36, 36, 35, 36, 36, 37, 36]}

 98%|█████████▊| 979998/1000000 [13:57:50<12:46, 26.10it/s]global step 980000, trans_decision ep_re 315.5657421889508

{"global_step": 980000, "eval_re": [536.8720936245119, 301.2184730488666, 
244.04590915974754, 338.3348302937816, 356.58844779571376, 291.00764575468696, 
765.2769399034083, 154.30899221920774, 18.24101645455389, 149.76307363502912], 
"eval_len": [522, 284, 232, 314, 337, 268, 748, 140, 21, 134]}

 99%|█████████▉| 989997/1000000 [14:06:12<06:21, 26.19it/s]global step 990000, trans_decision ep_re 24.98969540853736

{"global_step": 990000, "eval_re": [24.921130507847913, 20.90847030469692, 
23.418894609167758, 27.17940496516762, 23.254270466751006, 24.5741501374065, 
28.73141189958759, 28.23950085703408, 26.462304853647975, 22.20741548406625], 
"eval_len": [33, 33, 34, 33, 33, 32, 34, 35, 33, 33]}

100%|█████████▉| 999998/1000000 [14:14:42<00:00, 26.40it/s]global step 1000000, trans_decision ep_re 27.26447500544729

{"global_step": 1000000, "eval_re": [23.201665568616843, 30.088809265086716, 
27.098590463197997, 26.390797524400053, 29.07999492679928, 28.549483946144093, 
26.014936349781387, 28.196731430197392, 26.147862766650327, 27.87587781359885], 
"eval_len": [35, 33, 36, 34, 36, 34, 35, 35, 35, 36]}

100%|██████████| 1000000/1000000 [14:14:56<00:00, 19.49it/s]
