
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:30<13:08:12, 20.93it/s]global step 10000, trans_decision ep_re 37.9434657829838

{"global_step": 10000, "eval_re": [7.778332526884677, 18.353446379051512, 
85.11127991786226, 23.120664019869274, 9.974377541344031, 76.05539283964595, 
12.698294612701009, 46.54237717248486, 10.449602076108292, 89.35089074388615], 
"eval_len": [12, 18, 71, 21, 17, 44, 25, 41, 14, 49]}

  2%|▏         | 19999/1000000 [16:02<12:54:47, 21.08it/s]global step 20000, trans_decision ep_re 16.874338807036224

{"global_step": 20000, "eval_re": [10.945717256565873, 11.441390343464496, 
9.61069491731694, 17.879113925974465, 33.321624946851315, 13.809742891544769, 
16.991074201288967, 20.291560743367985, 19.3540361285647, 15.098432715422748], 
"eval_len": [16, 17, 12, 21, 39, 17, 19, 27, 19, 22]}

  3%|▎         | 29998/1000000 [27:00<12:38:44, 21.31it/s]global step 30000, trans_decision ep_re 18.03430762431479

{"global_step": 30000, "eval_re": [23.564920485410926, 9.496274831484273, 
20.310852406574583, 19.137200230043298, 9.969252934720593, 38.437193492501684, 
11.932752300939839, 11.922048339707613, 8.779976828472401, 26.79260439329267], 
"eval_len": [25, 15, 25, 28, 20, 30, 15, 14, 11, 26]}

  4%|▍         | 39999/1000000 [37:50<12:55:38, 20.63it/s]global step 40000, trans_decision ep_re 22.64016843053836

{"global_step": 40000, "eval_re": [16.757071910276565, 19.893731131754183, 
16.711596056820134, 18.032781493701176, 9.402737132154543, 74.0336556756513, 
12.110236400829448, 10.675898842372039, 33.02808707404397, 15.755888587780266], 
"eval_len": [20, 23, 30, 22, 11, 47, 22, 18, 31, 23]}

  5%|▍         | 49998/1000000 [48:30<12:53:40, 20.47it/s]global step 50000, trans_decision ep_re 43.766039589770045

{"global_step": 50000, "eval_re": [7.046410208573077, 161.3897255221779, 
25.155805885892505, 18.53720313885253, 15.146745395251749, 15.073910041865732, 
8.972123824453995, 16.81765579126443, 100.76524134078132, 68.75557474858729], 
"eval_len": [10, 86, 30, 18, 16, 21, 30, 26, 73, 89]}

  6%|▌         | 59999/1000000 [59:00<12:36:53, 20.70it/s]global step 60000, trans_decision ep_re 15.858712385234606

{"global_step": 60000, "eval_re": [13.49260824813825, 21.62413103769766, 
17.846440501739455, 25.763473096688283, 13.444424564856035, 10.11149293900833, 
16.95842330272402, 12.103539012221427, 16.980232187801096, 10.262358961471492], 
"eval_len": [15, 24, 22, 32, 14, 12, 27, 16, 21, 15]}

  7%|▋         | 69998/1000000 [1:10:00<12:12:54, 21.15it/s]global step 70000, trans_decision ep_re 47.37506914861986

{"global_step": 70000, "eval_re": [105.47606699651493, 21.792282885599306, 
8.641133856812935, 9.830034444636699, 11.953198802876289, 7.931670648542704, 
104.31200432259712, 99.06075636322127, 16.48936079393045, 88.26418237146686], 
"eval_len": [91, 22, 11, 15, 19, 11, 93, 76, 24, 63]}

  8%|▊         | 79997/1000000 [1:20:40<12:11:39, 20.96it/s]global step 80000, trans_decision ep_re 19.90592776635632

{"global_step": 80000, "eval_re": [64.93390222451333, 13.335712935407205, 
18.603163582019288, 12.503252050945, 11.841069672345025, 15.934710421005839, 
10.411263666723178, 15.089281229696903, 21.50502350286444, 14.901898378043], 
"eval_len": [54, 17, 20, 23, 22, 18, 17, 28, 22, 17]}

  9%|▉         | 89998/1000000 [1:31:20<12:12:06, 20.72it/s]global step 90000, trans_decision ep_re 41.19635030909533

{"global_step": 90000, "eval_re": [11.80020309264471, 7.676136083242052, 
15.107952815190655, 11.374755416624492, 18.062489726021905, 150.55118819616754, 
18.519312189385104, 8.168976945465365, 21.017129280920674, 149.68535934529075], 
"eval_len": [15, 11, 16, 14, 24, 93, 19, 11, 26, 90]}

 10%|▉         | 99998/1000000 [1:42:00<11:44:38, 21.29it/s]global step 100000, trans_decision ep_re 45.39226180933531

{"global_step": 100000, "eval_re": [192.67800532112548, 28.647231040955358, 
12.369829307794046, 15.357343835419085, 17.822566735967687, 10.62957149097952, 
80.22163385515451, 11.074239010181266, 78.65434559501058, 6.467851900765609], 
"eval_len": [93, 34, 34, 17, 19, 14, 57, 18, 73, 11]}

 11%|█         | 109999/1000000 [1:52:40<11:50:46, 20.87it/s]global step 110000, trans_decision ep_re 17.826438853425735

{"global_step": 110000, "eval_re": [8.799730830287844, 13.12204964331539, 
36.28639707793393, 20.111940775299956, 15.930262215655869, 20.516683822946582, 
8.845251931190184, 14.576096763036235, 14.416174501438544, 25.659800973152823], 
"eval_len": [13, 23, 31, 32, 21, 25, 16, 19, 28, 26]}

 12%|█▏        | 119998/1000000 [2:03:20<11:26:26, 21.37it/s]global step 120000, trans_decision ep_re 24.227143296915667

{"global_step": 120000, "eval_re": [86.114145593189, 12.48626853916261, 
10.194479710058193, 23.38103831588319, 24.158273774480854, 18.97008939117845, 
14.76146703813074, 9.20373802345849, 22.872889957627994, 20.129042625987164], 
"eval_len": [70, 17, 20, 22, 43, 19, 19, 11, 29, 26]}

 13%|█▎        | 129997/1000000 [2:14:00<11:40:53, 20.69it/s]global step 130000, trans_decision ep_re 24.771779563381806

{"global_step": 130000, "eval_re": [14.942685907475711, 11.383247135532407, 
18.12098662898796, 28.720431072145125, 12.755002183853023, 9.331870913618815, 
110.25167017139832, 9.714777708808036, 22.40502103884851, 10.092102873150179], 
"eval_len": [19, 16, 28, 35, 21, 14, 69, 13, 22, 12]}

 14%|█▍        | 139998/1000000 [2:24:40<11:14:07, 21.26it/s]global step 140000, trans_decision ep_re 16.880068446456125

{"global_step": 140000, "eval_re": [18.30587514803594, 14.357433227487517, 
16.53726242117119, 25.717480383100213, 33.3334067078403, 12.77607843658572, 
12.578342165603837, 13.106304931752486, 6.509985938684196, 15.578515104299838], 
"eval_len": [34, 16, 19, 26, 36, 20, 14, 20, 12, 23]}

 15%|█▍        | 149997/1000000 [2:35:20<11:51:59, 19.90it/s]global step 150000, trans_decision ep_re 16.63446707167491

{"global_step": 150000, "eval_re": [10.66661811975684, 12.110431049234604, 
13.153072300337662, 18.606679799139993, 53.711556743950965, 13.002789191515568, 
6.740093408417503, 14.840676375534054, 9.05134322452402, 14.461410504337865], 
"eval_len": [17, 15, 24, 27, 53, 20, 10, 16, 15, 18]}

 16%|█▌        | 159999/1000000 [2:45:51<11:20:15, 20.58it/s]global step 160000, trans_decision ep_re 21.538870599874873

{"global_step": 160000, "eval_re": [13.551281324933425, 14.768083682566623, 
13.671855992872597, 12.690085708259518, 20.36110335006859, 12.013143302423227, 
81.35284203344999, 15.701915491291091, 11.464502505261741, 19.81389260762189], 
"eval_len": [15, 19, 16, 14, 21, 23, 56, 25, 21, 35]}

 17%|█▋        | 169999/1000000 [2:56:32<11:05:30, 20.79it/s]global step 170000, trans_decision ep_re 22.770643775786816

{"global_step": 170000, "eval_re": [14.843101753801308, 22.526502333181714, 
9.132650568721925, 13.003494806327625, 10.353046301267215, 21.199332194699238, 
11.601586486047584, 11.258623340268683, 9.338558081054154, 104.44954189249869], 
"eval_len": [30, 30, 12, 17, 13, 24, 29, 14, 16, 66]}

 18%|█▊        | 179999/1000000 [3:07:30<10:57:13, 20.79it/s]global step 180000, trans_decision ep_re 25.767298318481785

{"global_step": 180000, "eval_re": [18.13314522205857, 19.021815567555674, 
11.300414765489442, 15.636953864601432, 10.957973745294035, 103.11820734419122, 
24.198171916986205, 19.55535255956525, 9.47290400349344, 26.278044195582535], 
"eval_len": [22, 26, 15, 17, 18, 113, 32, 36, 12, 30]}

 19%|█▉        | 189998/1000000 [3:18:10<10:55:14, 20.60it/s]global step 190000, trans_decision ep_re 14.303965429991056

{"global_step": 190000, "eval_re": [20.03841801447495, 12.105828830389381, 
18.634679248128577, 9.819325847416954, 11.381248279253256, 13.807901120862903, 
9.591826872152463, 13.812034144280178, 12.53281112517038, 21.3155808177815], 
"eval_len": [28, 18, 19, 13, 23, 28, 11, 20, 16, 19]}

 20%|█▉        | 199998/1000000 [3:28:50<10:29:44, 21.17it/s]global step 200000, trans_decision ep_re 18.33055987234749

{"global_step": 200000, "eval_re": [20.061918282830867, 25.239488412200018, 
6.070997793533066, 29.89694682714912, 21.553721650297195, 14.099818492581278, 
10.782619646612805, 26.075525765698732, 17.396547010176125, 12.128014842395686],
"eval_len": [19, 24, 9, 32, 26, 19, 15, 23, 25, 13]}

 21%|██        | 209998/1000000 [3:39:30<10:22:57, 21.14it/s]global step 210000, trans_decision ep_re 30.67042119233641

{"global_step": 210000, "eval_re": [7.077646678865015, 8.374230226266898, 
90.30348226740921, 77.06833801738536, 17.42405873781675, 13.635908590500799, 
23.498194309933883, 26.19797584527677, 8.00579997381919, 35.118577276090214], 
"eval_len": [10, 17, 67, 65, 18, 24, 32, 24, 10, 30]}

 22%|██▏       | 219997/1000000 [3:50:10<10:29:20, 20.66it/s]global step 220000, trans_decision ep_re 24.92289966407942

{"global_step": 220000, "eval_re": [15.073405265866311, 11.48693063059935, 
15.912885752839918, 11.023750772962009, 19.520103585012627, 122.5353955018626, 
12.428476287719299, 10.919117087502887, 17.660220989768625, 12.66871076666062], 
"eval_len": [19, 15, 28, 18, 30, 111, 18, 15, 18, 14]}

 23%|██▎       | 229997/1000000 [4:00:50<10:03:30, 21.26it/s]global step 230000, trans_decision ep_re 15.466916675089635

{"global_step": 230000, "eval_re": [26.520695385688356, 21.056763486386178, 
10.334918960814198, 15.072149022350143, 10.799902430277193, 14.938694959408524, 
26.883055431734405, 7.732347787715208, 12.264425633304215, 9.066213653217952], 
"eval_len": [24, 26, 14, 16, 13, 15, 23, 12, 17, 11]}

 24%|██▍       | 239999/1000000 [4:11:10<10:12:06, 20.69it/s]global step 240000, trans_decision ep_re 26.16822412511535

{"global_step": 240000, "eval_re": [10.585828081150522, 7.543893455640681, 
10.278002737825972, 37.00901019309279, 7.927660425388327, 27.375532862297028, 
9.990249433132576, 106.37494364243936, 21.550194747866083, 23.046925672320175], 
"eval_len": [16, 11, 13, 35, 37, 25, 12, 71, 25, 37]}

 25%|██▍       | 249998/1000000 [4:22:00<9:46:09, 21.33it/s]global step 250000, trans_decision ep_re 31.102418998478562

{"global_step": 250000, "eval_re": [23.55569549990033, 29.0493723949086, 
9.256135223831832, 18.86507396828218, 79.9957652670374, 17.213728309257455, 
15.407595180652853, 27.986295790978605, 9.861522480097433, 79.83300586983896], 
"eval_len": [34, 25, 13, 27, 53, 19, 24, 28, 13, 58]}

 26%|██▌       | 259999/1000000 [4:32:40<9:43:00, 21.15it/s]global step 260000, trans_decision ep_re 31.671845953736657

{"global_step": 260000, "eval_re": [12.473068840770182, 84.80379355350132, 
10.92565962410096, 19.687403727215322, 9.728625309604311, 31.84207000836321, 
91.09817949759103, 27.879416105879052, 19.257632220384494, 9.022610649956656], 
"eval_len": [20, 73, 13, 28, 12, 36, 78, 35, 28, 17]}

 27%|██▋       | 269999/1000000 [4:43:10<9:33:59, 21.20it/s]global step 270000, trans_decision ep_re 20.441326413118485

{"global_step": 270000, "eval_re": [24.70987291215249, 72.93860990938653, 
17.242609920534424, 7.203201108089813, 15.325983611393807, 15.63067922871389, 
16.844309397993705, 10.350170289816885, 9.762273991216755, 14.405553761886578], 
"eval_len": [27, 93, 24, 19, 17, 23, 23, 18, 14, 15]}

 28%|██▊       | 279998/1000000 [4:53:50<9:20:39, 21.40it/s]global step 280000, trans_decision ep_re 33.42163194734723

{"global_step": 280000, "eval_re": [23.12101339664953, 18.73485837560693, 
12.265116993471997, 96.59184975294637, 9.587090886020526, 59.361637221499976, 
13.881301036752362, 24.551397114549975, 65.8079536022902, 10.314101093684458], 
"eval_len": [28, 19, 25, 83, 17, 69, 19, 21, 56, 14]}

 29%|██▉       | 289999/1000000 [5:04:12<9:21:55, 21.06it/s]global step 290000, trans_decision ep_re 36.28474811051351

{"global_step": 290000, "eval_re": [17.038166545730192, 28.284105618816007, 
13.17715007121438, 12.0064540020712, 6.628104436076352, 99.95254732705675, 
28.86780335967341, 132.43907891771963, 11.935319372728847, 12.51875145404834], 
"eval_len": [19, 27, 21, 17, 17, 99, 47, 75, 25, 22]}

 30%|██▉       | 299999/1000000 [5:15:00<9:21:16, 20.79it/s]global step 300000, trans_decision ep_re 34.92618212840307

{"global_step": 300000, "eval_re": [18.954834401278962, 24.797421724868208, 
7.264943106049008, 10.253223782016837, 12.49150103090543, 23.976729453157553, 
14.494130781547563, 216.10636828597637, 11.437599642787108, 9.485069075443663], 
"eval_len": [22, 30, 10, 13, 21, 35, 18, 122, 28, 16]}

 31%|███       | 309998/1000000 [5:25:40<8:57:51, 21.38it/s]global step 310000, trans_decision ep_re 52.13676017642697

{"global_step": 310000, "eval_re": [11.626050840167089, 80.06951378231506, 
8.1312974765171, 8.14908381712657, 9.939253016349634, 19.665785786218784, 
11.566739246635649, 140.93431675000642, 209.22381242716057, 22.061748621772782],
"eval_len": [13, 54, 13, 28, 12, 20, 17, 90, 190, 27]}

 32%|███▏      | 319997/1000000 [5:36:20<9:08:26, 20.66it/s]global step 320000, trans_decision ep_re 51.81590187385981

{"global_step": 320000, "eval_re": [94.57219714370707, 16.782317969379836, 
12.723099263135511, 17.92199552474889, 23.055799663080396, 116.45272578280095, 
75.25667527680534, 59.13691638744319, 21.269234248188766, 80.98805747930813], 
"eval_len": [53, 19, 20, 20, 35, 73, 49, 57, 32, 55]}

 33%|███▎      | 329998/1000000 [5:47:00<8:51:08, 21.02it/s]global step 330000, trans_decision ep_re 12.853600116461072

{"global_step": 330000, "eval_re": [21.191717360990033, 5.280700870048961, 
13.704044417623674, 12.212834738429523, 12.166489238656624, 12.754447104519492, 
12.77226743735067, 14.099249015985604, 8.079231391261095, 16.275019589745057], 
"eval_len": [31, 8, 28, 16, 20, 34, 19, 19, 11, 18]}

 34%|███▍      | 339999/1000000 [5:57:21<8:41:25, 21.10it/s]global step 340000, trans_decision ep_re 53.03462513653177

{"global_step": 340000, "eval_re": [10.863942336188158, 113.41628919719176, 
6.7428684015088205, 22.942417802408748, 29.84707889259448, 31.55057415494512, 
101.60077232512627, 143.47510320528272, 37.02428599321587, 32.88291905685571], 
"eval_len": [20, 85, 15, 32, 32, 27, 85, 99, 33, 27]}

 35%|███▍      | 349999/1000000 [6:08:01<8:44:18, 20.66it/s]global step 350000, trans_decision ep_re 33.03187802836193

{"global_step": 350000, "eval_re": [23.03471417820598, 9.369169266124182, 
131.15925341476972, 9.88930386582699, 8.862818283565746, 14.42435256112152, 
12.895986478153386, 21.485861540966418, 23.072399250487805, 76.12492144439749], 
"eval_len": [26, 23, 86, 14, 24, 21, 18, 25, 23, 66]}

 36%|███▌      | 359999/1000000 [6:18:50<8:29:48, 20.92it/s]global step 360000, trans_decision ep_re 35.358347794799236

{"global_step": 360000, "eval_re": [11.088681822239616, 121.59382599974205, 
25.57990428161249, 7.09790930252638, 8.172644185886261, 130.68456929032158, 
17.056456064914723, 9.674644092483607, 12.73650240112373, 9.898340507141935], 
"eval_len": [23, 72, 27, 30, 10, 74, 24, 37, 17, 17]}

 37%|███▋      | 369998/1000000 [6:29:30<8:06:33, 21.58it/s]global step 370000, trans_decision ep_re 24.76617686761157

{"global_step": 370000, "eval_re": [7.910565120246503, 12.736271245275907, 
12.957559507534949, 9.37859314983832, 17.20244206672453, 23.28945741040632, 
8.98997464315529, 109.90893803914066, 21.746296918685562, 23.54167057510764], 
"eval_len": [10, 19, 15, 17, 23, 22, 13, 70, 28, 21]}

 38%|███▊      | 379999/1000000 [6:40:00<8:04:53, 21.31it/s]global step 380000, trans_decision ep_re 13.970342067728785

{"global_step": 380000, "eval_re": [7.8308488869457875, 19.23556096900449, 
6.9334618070512555, 36.61827116686608, 10.145916108585718, 21.265070215726453, 
10.79162894621085, 8.279751405155341, 9.15152488961686, 9.45138628212501], 
"eval_len": [24, 24, 12, 29, 19, 20, 19, 12, 16, 15]}

 39%|███▉      | 389998/1000000 [6:50:40<7:50:51, 21.59it/s]global step 390000, trans_decision ep_re 21.07325932132496

{"global_step": 390000, "eval_re": [9.337951077009468, 10.04357216342986, 
13.139494584466131, 21.04831064024471, 9.693833046094154, 13.893727604120155, 
15.729941704855376, 79.7051757045113, 21.23806437038935, 16.90252231812913], 
"eval_len": [17, 14, 17, 20, 23, 16, 22, 64, 28, 18]}

 40%|███▉      | 399998/1000000 [7:01:10<7:37:55, 21.84it/s]global step 400000, trans_decision ep_re 20.0209950236108

{"global_step": 400000, "eval_re": [20.145892561017096, 76.05471864154481, 
14.019033141248707, 17.75817684985615, 7.297423598122862, 11.23051598253623, 
10.789666726821382, 12.643919181669983, 15.657671832280124, 14.612931721010664],
"eval_len": [22, 49, 24, 21, 10, 13, 21, 18, 18, 31]}

 41%|████      | 409999/1000000 [7:11:40<7:41:35, 21.30it/s]global step 410000, trans_decision ep_re 39.43212420092245

{"global_step": 410000, "eval_re": [22.904532719666193, 9.488729711530803, 
140.4221905119439, 115.80278506902762, 36.206477756788516, 13.474434200558575, 
20.030869072536923, 7.5486660616341466, 7.455694184501863, 20.986862721035973], 
"eval_len": [22, 19, 93, 88, 50, 27, 24, 11, 12, 21]}

 42%|████▏     | 419998/1000000 [7:22:10<7:36:11, 21.19it/s]global step 420000, trans_decision ep_re 26.522884947505172

{"global_step": 420000, "eval_re": [17.93900303006822, 12.382419095089707, 
15.378807762696235, 25.901039112133333, 34.76467819704871, 12.809136857664026, 
17.441420544428066, 85.60410775626403, 23.512575832683943, 19.49566128697543], 
"eval_len": [26, 16, 28, 36, 28, 31, 32, 72, 24, 19]}

 43%|████▎     | 429999/1000000 [7:32:21<7:30:15, 21.10it/s]global step 430000, trans_decision ep_re 47.52372789855104

{"global_step": 430000, "eval_re": [21.8770602925738, 12.262117252057978, 
10.382651126470613, 145.22716788261187, 12.015252003653302, 16.089733244476772, 
13.759320624712853, 205.99596865295374, 13.824836457052305, 23.80317144894718], 
"eval_len": [29, 14, 16, 98, 16, 22, 15, 99, 20, 26]}

 44%|████▍     | 439999/1000000 [7:43:00<7:14:46, 21.47it/s]global step 440000, trans_decision ep_re 16.822241523487236

{"global_step": 440000, "eval_re": [12.59515531888603, 12.130965035859864, 
29.685264456173222, 21.60181817767534, 13.166248628990308, 12.577556265911602, 
15.20941618030679, 19.111909833788268, 16.493779467584048, 15.650301869696873], 
"eval_len": [14, 18, 32, 22, 20, 21, 16, 22, 19, 24]}

 45%|████▍     | 449998/1000000 [7:53:30<7:06:58, 21.47it/s]global step 450000, trans_decision ep_re 55.38250002250133

{"global_step": 450000, "eval_re": [19.875654028222034, 301.3760445632762, 
14.35602122070363, 11.977722948751513, 10.756149462681906, 23.13531593867385, 
12.144299760216102, 29.603244281740533, 95.29902421519303, 35.3015238055545], 
"eval_len": [27, 150, 22, 16, 19, 28, 17, 25, 72, 30]}

 46%|████▌     | 459998/1000000 [8:04:00<7:01:27, 21.35it/s]global step 460000, trans_decision ep_re 12.51920554120662

{"global_step": 460000, "eval_re": [24.420373482419397, 13.880447220018901, 
5.491679041730436, 11.385643880328143, 9.03235349989512, 9.171767246169011, 
11.159327387622147, 13.82391620196006, 15.694869282081054, 11.131678169841912], 
"eval_len": [29, 16, 9, 15, 15, 14, 24, 22, 19, 14]}

 47%|████▋     | 469999/1000000 [8:14:10<6:48:25, 21.63it/s]global step 470000, trans_decision ep_re 36.2872245604395

{"global_step": 470000, "eval_re": [27.20492342438839, 16.92360049609183, 
27.148122894709847, 30.734808565672047, 11.566919935590747, 12.568800321197028, 
11.11242862367966, 17.84637763053316, 187.63612456154573, 20.130139150986537], 
"eval_len": [24, 26, 25, 28, 14, 17, 28, 17, 86, 18]}

 48%|████▊     | 479997/1000000 [8:24:50<6:41:23, 21.59it/s]global step 480000, trans_decision ep_re 20.973457477748855

{"global_step": 480000, "eval_re": [11.584963149238392, 16.04044825910328, 
7.919644352520556, 6.486354302313558, 92.15291053014587, 11.214817062396728, 
19.17867921583254, 12.55238455612414, 21.92575073574633, 10.67862261406718], 
"eval_len": [14, 25, 28, 9, 69, 18, 17, 14, 19, 13]}

 49%|████▉     | 489999/1000000 [8:35:01<6:31:22, 21.72it/s]global step 490000, trans_decision ep_re 22.42067612164349

{"global_step": 490000, "eval_re": [19.79199281144396, 37.67405472046716, 
22.37612436899729, 17.112723476639033, 8.619090829274054, 10.194033313869825, 
8.596986983841914, 11.316411493773943, 65.51434280489582, 23.0110004132319], 
"eval_len": [24, 41, 41, 19, 12, 13, 18, 16, 41, 28]}

 50%|████▉     | 499999/1000000 [8:45:40<6:36:21, 21.02it/s]global step 500000, trans_decision ep_re 32.06777013701391

{"global_step": 500000, "eval_re": [15.561911496431781, 91.19447130868133, 
14.96349070338473, 9.785186125132068, 26.953412136353695, 10.593586207962058, 
12.212760897559825, 116.2839099429611, 9.220624436638326, 13.908348115034112], 
"eval_len": [20, 82, 33, 16, 23, 15, 14, 70, 16, 15]}

 51%|█████     | 509999/1000000 [8:56:10<6:25:44, 21.17it/s]global step 510000, trans_decision ep_re 28.201503722809605

{"global_step": 510000, "eval_re": [49.13304581805128, 9.061078133809206, 
11.222305099453026, 20.683233791026705, 116.4104410355262, 10.442316782461202, 
17.55739236549225, 15.39058718709416, 13.147976632511083, 18.966660382670902], 
"eval_len": [44, 12, 13, 21, 89, 12, 22, 17, 17, 21]}

 52%|█████▏    | 519999/1000000 [9:06:40<6:14:09, 21.38it/s]global step 520000, trans_decision ep_re 35.47493804547112

{"global_step": 520000, "eval_re": [8.9293088396422, 6.681469210681642, 
23.389183638356663, 11.249416698621637, 84.48463622304514, 13.239802870971948, 
7.7011247409666135, 25.46629162117861, 22.953296916916983, 150.65484969432976], 
"eval_len": [13, 10, 31, 16, 62, 19, 10, 30, 27, 73]}

 53%|█████▎    | 529999/1000000 [9:17:10<6:08:23, 21.26it/s]global step 530000, trans_decision ep_re 17.61138993962519

{"global_step": 530000, "eval_re": [16.175439167212136, 17.956188177313567, 
20.056221581427156, 11.764510705468384, 31.897828626273757, 11.866918841866186, 
18.802742157367806, 15.437340115266805, 14.785098202295845, 17.37161182176028], 
"eval_len": [17, 23, 23, 15, 26, 16, 21, 24, 31, 28]}

 54%|█████▍    | 539999/1000000 [9:27:40<6:08:27, 20.81it/s]global step 540000, trans_decision ep_re 49.23040328604173

{"global_step": 540000, "eval_re": [12.446149031626863, 10.220742784696219, 
16.702455144339087, 22.905759816681257, 166.49699300700294, 12.14862315793183, 
9.556358644808421, 10.084102968937566, 222.62458047563996, 9.118267828753108], 
"eval_len": [16, 17, 23, 26, 89, 20, 14, 12, 112, 18]}

 55%|█████▍    | 549999/1000000 [9:38:10<5:53:30, 21.22it/s]global step 550000, trans_decision ep_re 39.28464767189656

{"global_step": 550000, "eval_re": [11.727198056684957, 8.93983927655866, 
10.542553606543365, 66.18921864022398, 14.341110174882019, 236.31181692749934, 
9.21573907806667, 5.574995570837747, 9.186715467471481, 20.817289920197446], 
"eval_len": [15, 14, 13, 106, 25, 137, 11, 8, 14, 20]}

 56%|█████▌    | 559999/1000000 [9:48:40<5:48:35, 21.04it/s]global step 560000, trans_decision ep_re 33.90119896304285

{"global_step": 560000, "eval_re": [11.702510086123912, 20.317081886030934, 
75.17884200256618, 23.773504508511508, 18.145186436445893, 7.259676185455527, 
130.41333361388587, 15.012609973320238, 26.679564419375282, 10.52968051871313], 
"eval_len": [17, 32, 45, 22, 24, 10, 93, 23, 24, 13]}

 57%|█████▋    | 569999/1000000 [9:59:00<5:40:21, 21.06it/s]global step 570000, trans_decision ep_re 23.054779132492303

{"global_step": 570000, "eval_re": [7.729439814481157, 15.818367535207981, 
8.79817845783374, 20.636422225830568, 7.831650183521954, 127.70783871927169, 
16.62858759134096, 6.1791581127267206, 9.751137227042442, 9.46701145766582], 
"eval_len": [12, 17, 12, 39, 10, 107, 24, 8, 12, 21]}

 58%|█████▊    | 579999/1000000 [10:09:31<5:24:26, 21.58it/s]global step 580000, trans_decision ep_re 18.055803327564107

{"global_step": 580000, "eval_re": [11.376647030901461, 9.35100838163152, 
25.94261554813764, 18.583421084905915, 15.280218493613003, 21.457245448753884, 
32.17168420893711, 16.006833584902736, 9.469537752633427, 20.918821741224377], 
"eval_len": [21, 15, 32, 29, 17, 22, 31, 31, 12, 30]}

 59%|█████▉    | 589999/1000000 [10:20:01<5:25:57, 20.96it/s]global step 590000, trans_decision ep_re 41.073555019430856

{"global_step": 590000, "eval_re": [14.802124119389566, 39.207483110811026, 
40.27015053221429, 101.36384654549406, 108.21405938270475, 9.177676441914546, 
16.1732494414524, 49.42047609940739, 17.657430865477256, 14.44905365544329], 
"eval_len": [15, 31, 36, 70, 64, 14, 23, 39, 17, 19]}

 60%|█████▉    | 599999/1000000 [10:30:50<5:13:12, 21.28it/s]global step 600000, trans_decision ep_re 51.39012257293412

{"global_step": 600000, "eval_re": [14.512965736383745, 17.784021779497728, 
87.167007475394, 13.876659601712829, 9.06992971381921, 18.87159756080077, 
121.1825305038261, 84.6161026654516, 12.985787034283863, 133.8346236581713], 
"eval_len": [26, 23, 60, 16, 13, 28, 90, 65, 23, 72]}

 61%|██████    | 609999/1000000 [10:41:20<5:10:57, 20.90it/s]global step 610000, trans_decision ep_re 14.634716591993811

{"global_step": 610000, "eval_re": [11.896758636379523, 17.583804874502892, 
28.976883588381728, 10.660665883116089, 13.21436922118769, 13.610589591668669, 
18.060753538526175, 7.0928777949172055, 12.662865120357136, 12.587597670901017],
"eval_len": [19, 19, 33, 14, 16, 23, 23, 14, 18, 18]}

 62%|██████▏   | 619999/1000000 [10:51:50<5:00:49, 21.05it/s]global step 620000, trans_decision ep_re 22.470706446385766

{"global_step": 620000, "eval_re": [24.459178126945414, 7.980132833449481, 
15.87246128516713, 10.8887483933356, 19.52989934833355, 8.232418667271222, 
21.48388727674125, 14.926963054839028, 84.80795046007628, 16.525425017698694], 
"eval_len": [23, 11, 23, 13, 19, 11, 22, 24, 78, 22]}

 63%|██████▎   | 629999/1000000 [11:02:20<4:45:03, 21.63it/s]global step 630000, trans_decision ep_re 13.83916978970414

{"global_step": 630000, "eval_re": [20.812927425645974, 9.734429719935825, 
14.496107411266538, 17.46520324660548, 12.252825280552434, 13.769118715609654, 
12.71621139180596, 15.236103253020445, 8.158506878378285, 13.750264574220793], 
"eval_len": [50, 15, 20, 21, 19, 21, 17, 21, 12, 17]}

 64%|██████▍   | 639999/1000000 [11:12:42<4:45:16, 21.03it/s]global step 640000, trans_decision ep_re 15.529531517478498

{"global_step": 640000, "eval_re": [10.830461836781486, 17.940880947286622, 
22.144528485203764, 12.991194966656487, 17.81477396551749, 15.76774392388402, 
9.847992856908776, 25.731698852337487, 7.594545551590416, 14.631493788618439], 
"eval_len": [14, 22, 33, 15, 19, 19, 12, 29, 10, 18]}

 65%|██████▍   | 649999/1000000 [11:23:30<4:35:25, 21.18it/s]global step 650000, trans_decision ep_re 28.565189186983428

{"global_step": 650000, "eval_re": [12.084807863130889, 14.106139617137854, 
7.733319845684531, 74.82274804479871, 28.104353642163616, 9.821437389040186, 
99.58250108542208, 17.561334953975496, 7.55685878386683, 14.278390644614095], 
"eval_len": [19, 15, 13, 54, 31, 17, 83, 20, 14, 17]}

 66%|██████▌   | 659999/1000000 [11:34:10<4:27:17, 21.20it/s]global step 660000, trans_decision ep_re 23.234673427924964

{"global_step": 660000, "eval_re": [14.316013246189286, 10.263794057260434, 
27.833388308056755, 11.432907016608004, 23.548967271193014, 9.231440834942664, 
76.62089727565471, 6.997949144044768, 28.94297509520046, 23.15840203009952], 
"eval_len": [15, 15, 30, 14, 28, 13, 55, 9, 31, 26]}

 67%|██████▋   | 669999/1000000 [11:44:40<4:18:43, 21.26it/s]global step 670000, trans_decision ep_re 42.938018471122504

{"global_step": 670000, "eval_re": [14.546310283904427, 14.976635260462173, 
7.420258258113003, 6.89857084665382, 115.95524675623814, 15.299896666479029, 
178.24637145612607, 23.142619310862827, 13.681147490519109, 39.21312838186644], 
"eval_len": [18, 18, 15, 9, 64, 17, 103, 26, 14, 31]}

 68%|██████▊   | 679999/1000000 [11:55:00<4:08:51, 21.43it/s]global step 680000, trans_decision ep_re 15.209096074235273

{"global_step": 680000, "eval_re": [11.057818450642237, 11.17937321622462, 
19.232634569571037, 16.37235037608911, 15.676569229173841, 12.899601151254936, 
33.62166932314854, 9.279499844502798, 14.981264227918587, 7.790180353827038], 
"eval_len": [18, 13, 29, 19, 26, 15, 40, 16, 18, 14]}

 69%|██████▉   | 689998/1000000 [12:05:50<4:03:05, 21.25it/s]global step 690000, trans_decision ep_re 15.96731520906007

{"global_step": 690000, "eval_re": [13.7281476366631, 19.069992601289403, 
17.437244948992006, 26.86158466660804, 11.727833280452208, 9.063594540323747, 
20.695917572072787, 10.957024586151377, 15.502975562741414, 14.628836695306646],
"eval_len": [29, 19, 24, 38, 16, 13, 24, 13, 18, 16]}

 70%|██████▉   | 699999/1000000 [12:16:20<3:56:46, 21.12it/s]global step 700000, trans_decision ep_re 22.615603955919873

{"global_step": 700000, "eval_re": [15.969307318408088, 10.488646237162818, 
11.810546348386865, 7.1271603559666845, 7.2107452420424805, 22.974209636686723, 
101.57507661807587, 20.477615410800556, 9.85127809933412, 18.67145429233456], 
"eval_len": [22, 17, 13, 11, 12, 20, 81, 24, 31, 29]}

 71%|███████   | 709999/1000000 [12:26:41<3:45:20, 21.45it/s]global step 710000, trans_decision ep_re 35.88507718545056

{"global_step": 710000, "eval_re": [6.947523615161911, 6.987412794421674, 
22.66005197871947, 247.42463334398596, 13.268132778359217, 8.28690204456606, 
9.484426918217281, 11.26497527016198, 19.204471522781805, 13.3222415881302], 
"eval_len": [11, 10, 27, 144, 18, 13, 16, 16, 21, 17]}

 72%|███████▏  | 719999/1000000 [12:37:12<3:40:06, 21.20it/s]global step 720000, trans_decision ep_re 35.67091167319684

{"global_step": 720000, "eval_re": [36.43680997351831, 157.7856664401968, 
9.398749408902065, 10.6737405814727, 31.323549884960787, 19.119357248961514, 
15.982560267199629, 15.459336307486568, 11.76786315945867, 48.76148345981135], 
"eval_len": [51, 105, 18, 13, 27, 18, 20, 15, 13, 64]}

 73%|███████▎  | 729998/1000000 [12:48:00<3:30:36, 21.37it/s]global step 730000, trans_decision ep_re 40.055227425686084

{"global_step": 730000, "eval_re": [23.36086861150316, 71.37564728243396, 
130.70087288663385, 14.165912855925011, 66.80774767776037, 10.076095347050819, 
24.126530880217373, 7.75810649116439, 38.03901310073689, 14.141479123434994], 
"eval_len": [35, 47, 78, 32, 60, 15, 32, 12, 39, 18]}

 74%|███████▍  | 739998/1000000 [12:58:30<3:22:09, 21.44it/s]global step 740000, trans_decision ep_re 21.481538480332343

{"global_step": 740000, "eval_re": [9.28440332182712, 13.272341952667821, 
24.676844577123095, 10.601736522574084, 11.44768402801149, 15.624325574379684, 
13.835224980310617, 14.315520423141553, 18.19728785855919, 83.56001556472879], 
"eval_len": [13, 33, 22, 14, 25, 17, 13, 19, 21, 77]}

 75%|███████▍  | 749998/1000000 [13:09:00<3:18:35, 20.98it/s]global step 750000, trans_decision ep_re 17.595148231403403

{"global_step": 750000, "eval_re": [11.060696932017928, 7.062856591679628, 
22.23225097986061, 10.654450414848897, 29.759982063477825, 11.603357713740916, 
12.567819802775915, 46.47621256570827, 8.864942424539873, 15.66891282538417], 
"eval_len": [23, 10, 35, 15, 31, 17, 16, 34, 24, 31]}

 76%|███████▌  | 759997/1000000 [13:19:30<3:12:28, 20.78it/s]global step 760000, trans_decision ep_re 30.852268996976925

{"global_step": 760000, "eval_re": [11.243205919239754, 17.48200449924582, 
7.424344229374866, 20.03238740749807, 74.06371676933513, 9.382884392206524, 
17.469882281382805, 18.544566412517888, 124.41366148504179, 8.466036573926578], 
"eval_len": [14, 18, 12, 35, 44, 13, 24, 27, 91, 11]}

 77%|███████▋  | 769999/1000000 [13:30:00<3:02:57, 20.95it/s]global step 770000, trans_decision ep_re 12.962778396697777

{"global_step": 770000, "eval_re": [11.893728888316735, 10.057639537954561, 
9.582326073085696, 19.934292596234705, 12.14064576696493, 21.24710232178645, 
11.611054124801953, 12.511147264265988, 11.183242033345744, 9.466605360221022], 
"eval_len": [16, 15, 13, 25, 23, 19, 16, 16, 19, 12]}

 78%|███████▊  | 779999/1000000 [13:40:30<2:53:08, 21.18it/s]global step 780000, trans_decision ep_re 41.09807331210656

{"global_step": 780000, "eval_re": [132.9807097770404, 16.248946312176443, 
9.70682772031108, 19.490362970113043, 11.33080436492135, 38.032648801253764, 
112.27714248041315, 25.078655582859277, 35.21957387756314, 10.615061234413854], 
"eval_len": [95, 19, 15, 23, 21, 35, 87, 29, 33, 20]}

 79%|███████▉  | 789999/1000000 [13:50:53<2:44:24, 21.29it/s]global step 790000, trans_decision ep_re 35.96283863436089

{"global_step": 790000, "eval_re": [15.75002318990574, 10.924338630468396, 
16.46202809972836, 18.273491152588512, 164.7158655406511, 75.81937781588506, 
13.20057191528332, 12.133920118070717, 12.461753037487323, 19.887016843540398], 
"eval_len": [17, 13, 22, 20, 128, 48, 33, 14, 15, 22]}

 80%|███████▉  | 799999/1000000 [14:01:24<2:38:44, 21.00it/s]global step 800000, trans_decision ep_re 13.474236133262536

{"global_step": 800000, "eval_re": [15.42836395667387, 8.306010926645593, 
15.330839298087769, 16.550926716309302, 9.429189047792503, 8.108361327441383, 
10.449942144088975, 10.67426318423329, 12.792533143858867, 27.671931587493816], 
"eval_len": [19, 12, 16, 21, 18, 19, 18, 19, 14, 26]}

 81%|████████  | 809998/1000000 [14:12:10<2:27:58, 21.40it/s]global step 810000, trans_decision ep_re 16.763087557599302

{"global_step": 810000, "eval_re": [9.975651331842604, 12.7118989460486, 
29.31580256520747, 43.229369276963105, 19.421356410108597, 12.255462435408175, 
9.528449304342846, 12.433931705188579, 11.298180175812542, 7.460773425070492], 
"eval_len": [12, 14, 35, 43, 18, 23, 36, 28, 14, 11]}

 82%|████████▏ | 819999/1000000 [14:22:40<2:21:11, 21.25it/s]global step 820000, trans_decision ep_re 13.924287291307744

{"global_step": 820000, "eval_re": [12.677443304621285, 7.570852615145928, 
18.07193346377055, 10.94949712536353, 12.264827129656117, 11.444336406257529, 
11.280361004249691, 14.596346544565002, 23.345087176139174, 17.042188143308646],
"eval_len": [13, 15, 34, 15, 26, 13, 16, 15, 22, 29]}

 83%|████████▎ | 829999/1000000 [14:33:11<2:14:21, 21.09it/s]global step 830000, trans_decision ep_re 59.120497452151696

{"global_step": 830000, "eval_re": [91.29114706535907, 19.582016873938176, 
179.34319205190167, 17.89353454346924, 164.0145817881832, 11.446861923497188, 
21.66515560853255, 34.24111862806592, 18.465685582319015, 33.26168045625102], 
"eval_len": [76, 22, 160, 33, 83, 19, 20, 27, 24, 30]}

 84%|████████▍ | 839999/1000000 [14:43:33<2:05:37, 21.23it/s]global step 840000, trans_decision ep_re 20.762103376059294

{"global_step": 840000, "eval_re": [72.6293968969243, 20.580630637064143, 
8.441377576594347, 16.034297289181897, 10.698588570067905, 30.08903584154221, 
13.615787720213678, 11.307878339100307, 8.484342102297695, 15.739698787606432], 
"eval_len": [57, 20, 12, 17, 20, 26, 18, 15, 11, 21]}

 85%|████████▍ | 849998/1000000 [14:54:21<1:57:43, 21.24it/s]global step 850000, trans_decision ep_re 29.718447579882813

{"global_step": 850000, "eval_re": [11.047774082348832, 13.366166291705945, 
14.76960027708457, 12.225299475879646, 14.162640146502671, 10.608651486930285, 
71.82471555359714, 20.229390486701288, 114.20190777558969, 14.748330222488027], 
"eval_len": [15, 16, 18, 22, 16, 13, 67, 19, 70, 18]}

 86%|████████▌ | 859997/1000000 [15:04:51<1:49:00, 21.41it/s]global step 860000, trans_decision ep_re 14.594802106650684

{"global_step": 860000, "eval_re": [10.859630344651224, 15.366028038083892, 
24.435002605482357, 13.664461060432629, 8.773211783967925, 8.791004694662996, 
20.440690377548194, 14.910232643233723, 17.656203991206116, 11.051555527237781],
"eval_len": [12, 18, 51, 15, 18, 15, 21, 20, 19, 25]}

 87%|████████▋ | 869998/1000000 [15:15:21<1:43:09, 21.00it/s]global step 870000, trans_decision ep_re 20.762870750135182

{"global_step": 870000, "eval_re": [68.6907952330646, 26.154624583947292, 
23.86022509406405, 12.650714932891146, 10.973691152274771, 22.410227936922556, 
9.513895833069753, 8.042762937635542, 14.81431530560377, 10.517454491878343], 
"eval_len": [41, 31, 33, 14, 17, 28, 15, 10, 15, 14]}

 88%|████████▊ | 879999/1000000 [15:25:31<1:34:35, 21.14it/s]global step 880000, trans_decision ep_re 16.88015113323856

{"global_step": 880000, "eval_re": [12.254882116021463, 11.546411092193113, 
16.102751790490586, 15.857457309457638, 13.08336090443884, 10.510562031310563, 
34.51205528610806, 19.71463336152379, 18.5482305182067, 16.67116692263483], 
"eval_len": [25, 18, 17, 16, 18, 15, 30, 28, 20, 19]}

 89%|████████▉ | 889999/1000000 [15:36:11<1:25:16, 21.50it/s]global step 890000, trans_decision ep_re 32.44669110343909

{"global_step": 890000, "eval_re": [15.410110860624004, 142.08079967930294, 
13.825958784574444, 17.8316677101289, 18.510222303235516, 27.945007237122677, 
32.86244354472963, 16.43076328802171, 11.313790241153923, 28.256147385497158], 
"eval_len": [21, 73, 22, 30, 18, 26, 27, 19, 17, 26]}

 90%|████████▉ | 899998/1000000 [15:46:41<1:18:39, 21.19it/s]global step 900000, trans_decision ep_re 49.9110838999723

{"global_step": 900000, "eval_re": [24.30916679742675, 15.510363141557344, 
20.40503318814742, 6.70888439246463, 133.44302225373576, 65.3940235676581, 
11.734181003636712, 80.6751366384191, 129.53354133876502, 11.39748667791207], 
"eval_len": [22, 17, 21, 9, 86, 49, 13, 56, 138, 17]}

 91%|█████████ | 909999/1000000 [15:56:53<1:10:35, 21.25it/s]global step 910000, trans_decision ep_re 17.527385604299717

{"global_step": 910000, "eval_re": [24.879872598507756, 20.972122337163917, 
8.063771768552215, 8.25876794434323, 7.682501194422572, 21.501482185461814, 
12.35569563327626, 33.31925310512491, 17.31358596587244, 20.92680331027206], 
"eval_len": [27, 23, 11, 13, 13, 20, 17, 27, 18, 28]}

 92%|█████████▏| 919999/1000000 [16:07:31<1:01:57, 21.52it/s]global step 920000, trans_decision ep_re 27.48771786367314

{"global_step": 920000, "eval_re": [12.719044275718113, 151.38530962546474, 
11.579177235853475, 10.339972900726295, 22.59765680543323, 23.502176967940336, 
8.959462148396957, 13.40609229144728, 8.016060814648169, 12.37222557110281], 
"eval_len": [18, 104, 13, 32, 36, 34, 22, 30, 15, 19]}

 93%|█████████▎| 929999/1000000 [16:18:01<54:41, 21.33it/s]global step 930000, trans_decision ep_re 15.870183742709774

{"global_step": 930000, "eval_re": [17.52744456107045, 16.47164208917979, 
9.233142613534124, 10.453705256811991, 21.048502658070987, 9.388959954627746, 
30.257529040029937, 7.7810657353029695, 15.787175712322357, 20.752669806147406],
"eval_len": [18, 25, 12, 15, 24, 15, 27, 11, 24, 24]}

 94%|█████████▍| 939998/1000000 [16:28:31<46:16, 21.61it/s]global step 940000, trans_decision ep_re 21.833539602209104

{"global_step": 940000, "eval_re": [10.963410707532748, 14.265231531262343, 
10.880278314868042, 122.2507600445619, 8.5992693698825, 8.027495857270578, 
11.116811220684951, 14.068614959086085, 8.927181759645874, 9.23634225729602], 
"eval_len": [13, 26, 13, 67, 13, 12, 13, 16, 11, 13]}

 95%|█████████▍| 949999/1000000 [16:38:41<39:15, 21.22it/s]global step 950000, trans_decision ep_re 30.194321517581727

{"global_step": 950000, "eval_re": [5.270047399684627, 110.3878032752551, 
13.14800531360935, 21.413753168304332, 13.582061081211124, 89.20917768267185, 
10.335108043623096, 15.486335799715055, 13.428170083008093, 9.682753328734666], 
"eval_len": [8, 74, 16, 25, 21, 124, 15, 21, 14, 29]}

 96%|█████████▌| 959999/1000000 [16:49:21<31:11, 21.38it/s]global step 960000, trans_decision ep_re 15.469744342230877

{"global_step": 960000, "eval_re": [10.409769084491, 19.085220748216315, 
19.3814775272764, 17.892823705989542, 18.100085483317486, 21.343012314878937, 
8.299216236875731, 16.085121671024492, 7.683236296338762, 16.41748035390009], 
"eval_len": [14, 23, 23, 28, 34, 31, 23, 21, 10, 20]}

 97%|█████████▋| 969998/1000000 [16:59:51<23:14, 21.51it/s]global step 970000, trans_decision ep_re 17.9256080774644

{"global_step": 970000, "eval_re": [11.909776357548349, 16.65745367547052, 
8.083967207511261, 13.397022489304677, 27.7239616304002, 25.580470756529994, 
16.617088892069177, 14.479912630994544, 11.442111208819007, 33.364315925996266],
"eval_len": [21, 17, 13, 17, 34, 23, 17, 21, 34, 31]}

 98%|█████████▊| 979999/1000000 [17:10:03<15:43, 21.21it/s]global step 980000, trans_decision ep_re 20.961687803221828

{"global_step": 980000, "eval_re": [8.795105532576738, 21.475608859275415, 
19.0338383263293, 11.58322708830346, 12.190314372490894, 14.562494930718096, 
66.51932243118542, 9.117073354108872, 33.14079376000981, 13.199099377220287], 
"eval_len": [33, 24, 28, 17, 18, 17, 49, 13, 28, 14]}

 99%|█████████▉| 989999/1000000 [17:20:31<07:49, 21.31it/s]global step 990000, trans_decision ep_re 21.511058143302666

{"global_step": 990000, "eval_re": [28.939930675769478, 23.925249993127384, 
9.55357391988055, 52.00844001080335, 16.800536459948347, 9.538689952428758, 
13.276396065904912, 17.296606318446006, 28.316795527929123, 15.454362508788765],
"eval_len": [25, 22, 12, 51, 22, 16, 41, 18, 32, 19]}

100%|█████████▉| 999999/1000000 [17:31:11<00:00, 21.08it/s]global step 1000000, trans_decision ep_re 27.072219484113383

{"global_step": 1000000, "eval_re": [11.357769225810499, 39.57149297709356, 
19.243171185970365, 72.19275390580191, 23.74748091742822, 24.392355244373732, 
7.590161736445251, 25.037780516715035, 31.88026039044592, 15.708968741049372], 
"eval_len": [13, 53, 27, 55, 21, 22, 11, 21, 28, 20]}

100%|██████████| 1000000/1000000 [17:31:17<00:00, 15.85it/s]
