
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.15
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:40<8:40:02, 31.73it/s]global step 10000, trans_decision ep_re 144.86276256029163

{"global_step": 10000, "eval_re": [10.809694604123953, 3.4387543475894167, 
204.120971466634, 232.8746932498973, 6.575717819760585, 622.4008657233613, 
34.89960293642334, 304.41851964937064, 14.629004326367994, 14.459801479387822], 
"eval_len": [20, 18, 110, 116, 20, 586, 118, 167, 22, 22]}

  2%|▏         | 19999/1000000 [10:21<8:37:52, 31.54it/s]global step 20000, trans_decision ep_re 128.51818298595563

{"global_step": 20000, "eval_re": [206.93891142518265, 299.2653089236389, 
219.79047205881307, 204.4512256586044, 14.843839162256728, 8.951613891445913, 
11.704328949173243, 2.7866822810520024, 9.22247443176232, 307.226973077627], 
"eval_len": [320, 230, 249, 128, 25, 20, 21, 13, 19, 217]}

  3%|▎         | 29997/1000000 [17:15<8:23:57, 32.08it/s]global step 30000, trans_decision ep_re 56.37292877021859

{"global_step": 30000, "eval_re": [90.46426567192044, 5.12972116711313, 
9.004071442655789, 215.62158316214777, 8.546469862575293, 18.974367231706122, 
195.9545410382991, 7.627836585402498, 7.601130280382473, 4.805301259983342], 
"eval_len": [93, 17, 20, 131, 22, 50, 119, 20, 22, 17]}

  4%|▍         | 39997/1000000 [24:06<8:26:13, 31.61it/s]global step 40000, trans_decision ep_re 11.086373474522246

{"global_step": 40000, "eval_re": [6.393067772802112, 7.524105756137629, 
11.580506744193203, 17.86537751629246, 7.5044745624519695, 24.256352975770547, 
9.748419166003211, 12.730696216558103, 2.6435981958118444, 10.617135839201373], 
"eval_len": [23, 20, 22, 29, 23, 73, 21, 25, 12, 22]}

  5%|▍         | 49997/1000000 [30:57<8:33:35, 30.83it/s]global step 50000, trans_decision ep_re 30.720846026588845

{"global_step": 50000, "eval_re": [4.890209306221344, 17.194010544110697, 
13.333455603651519, 63.86558561697884, 10.559889495359203, 12.763273446559381, 
168.77957504541385, 5.071225406780823, 5.027993030849265, 5.7232427699635], 
"eval_len": [23, 33, 24, 82, 21, 26, 154, 17, 21, 23]}

  6%|▌         | 59997/1000000 [38:00<8:30:04, 30.71it/s]global step 60000, trans_decision ep_re 164.58081317800705

{"global_step": 60000, "eval_re": [420.63170582048383, 8.268943885347573, 
3.8842326473499376, 547.9930348176309, 10.084750901525732, 262.13824982141665, 
4.02281370664485, 11.047087033692273, 375.65597909431125, 2.0813340516671364], 
"eval_len": [273, 19, 19, 292, 22, 130, 18, 22, 162, 22]}

  7%|▋         | 69997/1000000 [44:43<8:11:37, 31.53it/s]global step 70000, trans_decision ep_re 196.78721400813475

{"global_step": 70000, "eval_re": [735.717418667242, 57.823604951661416, 
8.480013027455312, 3.5941246132553606, 11.21109616871785, 6.71240673112563, 
659.7360844240566, 7.615137871076941, 7.262810949795101, 469.71944267696136], 
"eval_len": [313, 73, 23, 19, 21, 24, 266, 21, 20, 259]}

  8%|▊         | 79997/1000000 [51:50<8:06:01, 31.55it/s]global step 80000, trans_decision ep_re 129.38732473992303

{"global_step": 80000, "eval_re": [6.430669659887576, 218.41499133961162, 
586.6242619022927, 9.105200404359438, 17.616873736079334, 235.36070400640418, 
127.88846996867542, 3.999691772142733, 80.85736472993793, 7.575019879839413], 
"eval_len": [21, 136, 266, 22, 38, 133, 114, 16, 105, 22]}

  9%|▉         | 89997/1000000 [58:32<8:07:47, 31.09it/s]global step 90000, trans_decision ep_re 42.77173272462077

{"global_step": 90000, "eval_re": [5.845178019879546, 0.44314706161157136, 
4.155246048909663, 7.2564572061824695, 12.849670197704896, 6.1477941358062855, 
209.05984900214938, 11.8348315429446, 8.72939876589227, 161.39575526512698], 
"eval_len": [18, 20, 21, 24, 22, 20, 113, 21, 22, 146]}

 10%|▉         | 99997/1000000 [1:05:24<7:59:39, 31.27it/s]global step 100000, trans_decision ep_re 210.89606448005165

{"global_step": 100000, "eval_re": [626.2894674079531, 37.69352438461142, 
423.8136685047166, 5.695628519023075, 8.144455111525609, 200.57195631667688, 
410.17820453235225, 8.493094650814971, 4.408921339448945, 383.6717240333939], 
"eval_len": [224, 39, 204, 19, 20, 107, 174, 20, 19, 153]}

 11%|█         | 109999/1000000 [1:12:30<7:41:03, 32.17it/s]global step 110000, trans_decision ep_re 281.83178345384334

{"global_step": 110000, "eval_re": [8.211847214744864, 14.925879637000259, 
5.428081215177688, 9.848705023652817, 1.165185838639179, 1132.1367477156193, 
1095.863726560725, 197.00605146425593, 345.0224765536118, 8.709133315006488], 
"eval_len": [20, 23, 18, 23, 13, 453, 449, 189, 194, 20]}

 12%|█▏        | 119997/1000000 [1:19:12<7:40:45, 31.83it/s]global step 120000, trans_decision ep_re 174.5105309993094

{"global_step": 120000, "eval_re": [2.491738347720798, 10.961251164861615, 
1.1046424114003626, 10.120920554407965, 6.937028469464123, 32.351655103073725, 
5.689014287635047, 9.948918641007019, 1658.6252658486721, 6.874875164851065], 
"eval_len": [23, 23, 13, 22, 18, 36, 18, 19, 592, 22]}

 13%|█▎        | 129999/1000000 [1:26:05<7:44:45, 31.20it/s]global step 130000, trans_decision ep_re 292.8263247845371

{"global_step": 130000, "eval_re": [1176.4682842239731, 6.3543774738822405, 
4.281093605257198, 161.36718640770826, 25.961380072483355, 1110.991571561101, 
6.7517301276331025, 9.898736814493114, 10.981201629610887, 415.20768592922883], 
"eval_len": [450, 19, 18, 80, 34, 434, 19, 22, 20, 181]}

 14%|█▍        | 139999/1000000 [1:33:10<7:36:01, 31.43it/s]global step 140000, trans_decision ep_re 249.4666871357935

{"global_step": 140000, "eval_re": [5.937797040650476, 4.69421928782052, 
658.1295263909875, 933.0557536707669, 12.1627793991616, 4.265030865890868, 
9.45482091965727, 12.715730547870983, 849.1753239333633, 5.075889301765517], 
"eval_len": [18, 19, 255, 337, 24, 19, 21, 22, 297, 15]}

 15%|█▍        | 149997/1000000 [1:39:52<7:28:06, 31.61it/s]global step 150000, trans_decision ep_re 300.3178447842091

{"global_step": 150000, "eval_re": [10.434279530668547, 447.7993615387378, 
1018.1239892912301, 7.691625582639435, 10.557181235619993, 1.5446048768929022, 
3.575998072243419, 689.3185050635693, 12.762030974955174, 801.3708716755352], 
"eval_len": [23, 197, 393, 18, 23, 20, 17, 297, 21, 314]}

 16%|█▌        | 159999/1000000 [1:46:47<7:26:00, 31.39it/s]global step 160000, trans_decision ep_re 88.92794544347763

{"global_step": 160000, "eval_re": [0.5736477141863263, 525.16684913051, 
10.770966965105522, 9.976088262780692, 9.073665461066756, 10.935677318325492, 
-0.8698414339625947, 134.08117391712554, 185.5356852371975, 4.035541862440942], 
"eval_len": [21, 218, 22, 20, 20, 23, 22, 89, 102, 15]}

 17%|█▋        | 169999/1000000 [1:53:40<7:12:33, 31.98it/s]global step 170000, trans_decision ep_re 291.9392232226437

{"global_step": 170000, "eval_re": [718.6410272965259, 156.37319796795782, 
5.633118996554189, 310.3873207313389, 524.1895364834893, 7.87961130584988, 
3.119663379128159, 0.6427138964306434, 10.676620635327986, 1181.8494215338346], 
"eval_len": [311, 88, 20, 149, 217, 22, 19, 13, 23, 451]}

 18%|█▊        | 179997/1000000 [2:00:35<7:25:29, 30.68it/s]global step 180000, trans_decision ep_re 196.45877444005654

{"global_step": 180000, "eval_re": [91.06837527371943, 13.619223564900011, 
235.40858389241674, 3.3708360970304287, 14.717768999168326, 12.785669446916163, 
11.552119373145489, 10.94587625250315, 1564.6473521597184, 6.471939341047393], 
"eval_len": [73, 24, 110, 20, 23, 23, 23, 22, 580, 19]}

 19%|█▉        | 189999/1000000 [2:07:28<7:10:52, 31.33it/s]global step 190000, trans_decision ep_re 25.70922361458483

{"global_step": 190000, "eval_re": [8.785372461190875, 18.249361304025484, 
113.91334510819412, 12.909024107926786, 2.7703078975381206, 6.699544234044116, 
51.0906395901853, 5.494359433568066, 3.8861935298249146, 33.294088479350506], 
"eval_len": [21, 31, 73, 23, 16, 22, 63, 20, 21, 47]}

 20%|█▉        | 199999/1000000 [2:14:30<6:51:30, 32.40it/s]global step 200000, trans_decision ep_re 169.50399729327958

{"global_step": 200000, "eval_re": [9.000067023789423, 7.640117259673841, 
10.591744443809292, 231.5100429129512, 1393.6575905682857, 8.953537005066558, 
6.789193342229104, 10.296607056040632, 6.383213022575276, 10.217860298374474], 
"eval_len": [18, 19, 24, 141, 548, 22, 20, 19, 19, 22]}

 21%|██        | 209999/1000000 [2:21:11<7:01:16, 31.25it/s]global step 210000, trans_decision ep_re 239.0159018274081

{"global_step": 210000, "eval_re": [12.768566056513995, 10.982279962932001, 
15.346491299478181, 6.61695321304129, 39.43006552474445, 12.073153988908743, 
729.0283853515198, 905.4094371225403, 11.000447611396257, 647.5032381430059], 
"eval_len": [22, 21, 32, 18, 83, 22, 294, 332, 22, 286]}

 22%|██▏       | 219997/1000000 [2:28:05<6:47:11, 31.93it/s]global step 220000, trans_decision ep_re 113.32999266637482

{"global_step": 220000, "eval_re": [168.34466399873102, 6.3741257445435995, 
11.326551775034797, 619.0183395890014, 40.39299714459413, 215.30676517042457, 
25.540094440396253, 13.479282840382599, 27.366141030918012, 6.15096492972201], 
"eval_len": [95, 16, 23, 289, 47, 165, 34, 34, 35, 16]}

 23%|██▎       | 229997/1000000 [2:34:59<6:50:14, 31.28it/s]global step 230000, trans_decision ep_re 30.33400970712442

{"global_step": 230000, "eval_re": [154.51450683078627, 6.5638879254705005, 
5.3370916274987215, 5.585064200402869, 8.13095194499924, 6.3203115960636165, 
3.0221296754303966, 101.56345881024029, 3.6673453550772135, 8.635349105275107], 
"eval_len": [99, 17, 18, 18, 18, 19, 16, 86, 17, 23]}

 24%|██▍       | 239997/1000000 [2:42:00<6:36:27, 31.95it/s]global step 240000, trans_decision ep_re 337.4405507816849

{"global_step": 240000, "eval_re": [40.635104336971786, 9.483818509661097, 
9.399787272515017, 714.7386376254884, 556.1470836608121, 7.109479687428518, 
6.780764580715191, 8.996639962855522, 2013.4066078421242, 7.70758433827657], 
"eval_len": [43, 22, 20, 255, 218, 17, 22, 19, 698, 21]}

 25%|██▍       | 249997/1000000 [2:49:00<6:30:54, 31.98it/s]global step 250000, trans_decision ep_re 604.8008749628386

{"global_step": 250000, "eval_re": [4.211886427978257, 6.312604406374127, 
8.192619719458959, 2369.8190075310686, 31.094866966303837, 34.84605292588124, 
23.728784721486683, 29.168295208892957, 1506.4647648284526, 2034.169866892489], 
"eval_len": [17, 19, 21, 836, 35, 37, 38, 37, 480, 699]}

 26%|██▌       | 259997/1000000 [2:55:42<6:36:37, 31.10it/s]global step 260000, trans_decision ep_re 106.90780592442259

{"global_step": 260000, "eval_re": [7.460691719840371, 12.447627194694102, 
23.38144759135734, 26.63894790650227, 9.448803912686284, 25.427392536309554, 
9.705417621505957, 926.2400086415369, 20.955201357560608, 7.372520762232595], 
"eval_len": [17, 21, 35, 35, 21, 35, 22, 337, 35, 19]}

 27%|██▋       | 269997/1000000 [3:02:33<6:18:16, 32.16it/s]global step 270000, trans_decision ep_re 274.5144699137894

{"global_step": 270000, "eval_re": [160.3589662209852, 9.363439675806992, 
157.116426157419, 7.689662593647977, 767.8777117941945, 11.452148058181681, 
0.5019756105462665, 1328.557227868894, 157.1206653352575, 145.1064758229611], 
"eval_len": [108, 22, 100, 19, 354, 21, 13, 453, 101, 92]}

 28%|██▊       | 279999/1000000 [3:09:40<6:25:29, 31.13it/s]global step 280000, trans_decision ep_re 498.5648908265125

{"global_step": 280000, "eval_re": [29.750602382684907, 148.69400207918576, 
61.12289243508185, 1493.6125483104538, 7.191203100578006, 3.932534066583935, 
719.3769136574917, 2510.20275258784, 8.35223720071585, 3.4132224445090538], 
"eval_len": [38, 96, 61, 488, 21, 23, 243, 689, 20, 16]}

 29%|██▉       | 289999/1000000 [3:16:40<6:11:40, 31.84it/s]global step 290000, trans_decision ep_re 1041.0182819680779

{"global_step": 290000, "eval_re": [9.905295606339045, 3392.7653338391765, 
115.02562822781191, 10.313424012871549, 787.094069784084, 9.336484933945906, 
2161.5631483005855, 1886.1358072891292, 7.094663107229091, 2030.9489645796054], 
"eval_len": [20, 1000, 86, 20, 271, 20, 667, 593, 17, 644]}

 30%|██▉       | 299997/1000000 [3:23:24<6:06:57, 31.79it/s]global step 300000, trans_decision ep_re 296.7802430920392

{"global_step": 300000, "eval_re": [211.3143461015681, 4.867195217216187, 
6.267730087003626, 1876.1454860238173, 10.228279861117006, 745.371348860551, 
8.621927684951517, 1.638185000686132, 92.17739044375071, 11.170541639730903], 
"eval_len": [127, 16, 23, 555, 23, 282, 20, 19, 100, 20]}

 31%|███       | 309999/1000000 [3:30:18<6:06:07, 31.41it/s]global step 310000, trans_decision ep_re 10.203570027877328

{"global_step": 310000, "eval_re": [5.495897059721618, 12.850955943277704, 
7.526695638813163, 6.632078054477318, 12.662955569212643, 6.869354192037505, 
23.64676129764141, 3.967273201292481, 10.795847112056547, 11.587882210242899], 
"eval_len": [16, 23, 22, 21, 24, 19, 33, 15, 24, 23]}

 32%|███▏      | 319999/1000000 [3:37:20<6:01:02, 31.39it/s]global step 320000, trans_decision ep_re 730.4996849384104

{"global_step": 320000, "eval_re": [5.2772890342233145, 1083.7354255035234, 
1901.6390335837498, 5.9819190829827225, 1.1242851065829322, 3620.263798375673, 
639.0031021105034, 16.090174914988484, 29.25843551758657, 2.6233861542895065], 
"eval_len": [18, 352, 604, 21, 19, 1000, 272, 30, 38, 17]}

 33%|███▎      | 329997/1000000 [3:44:20<5:50:36, 31.85it/s]global step 330000, trans_decision ep_re 574.0783946195472

{"global_step": 330000, "eval_re": [1693.425750071789, 9.383804351963184, 
5.74625453251512, 9.747102416773108, 107.92290328587339, 58.36923310452266, 
2742.8222068436826, 8.769929411267661, 4.22686973138366, 1100.3698924457017], 
"eval_len": [489, 24, 15, 20, 77, 62, 796, 22, 16, 380]}

 34%|███▍      | 339997/1000000 [3:51:03<5:58:30, 30.68it/s]global step 340000, trans_decision ep_re 75.51271848517206

{"global_step": 340000, "eval_re": [2.0545485549699465, 150.04875394382555, 
12.023632024470139, 4.850186741387464, 19.850450516202123, 7.063589301434037, 
7.237298065731824, 539.3753565072109, 3.69641480319064, 8.926954393297912], 
"eval_len": [13, 84, 24, 17, 43, 19, 21, 216, 21, 22]}

 35%|███▍      | 349997/1000000 [3:57:55<5:42:10, 31.66it/s]global step 350000, trans_decision ep_re 79.80602290963606

{"global_step": 350000, "eval_re": [27.005572428224127, 20.09721778002267, 
123.46131785491531, 475.8348877253184, 34.92433728792715, 66.05438409602357, 
9.200861411297183, 31.040012275507923, 7.024505573444831, 3.417132663679469], 
"eval_len": [41, 33, 69, 201, 46, 80, 20, 38, 18, 19]}

 36%|███▌      | 359999/1000000 [4:04:48<5:41:39, 31.22it/s]global step 360000, trans_decision ep_re 20.219414922234648

{"global_step": 360000, "eval_re": [7.794776720936684, 28.37575144161148, 
36.60357715445706, 3.48067349570622, 10.194383626420525, 0.4363246211267826, 
63.434392060859274, 6.762480015950056, 38.8190464679241, 6.292743617354323], 
"eval_len": [21, 42, 41, 16, 21, 19, 63, 17, 40, 19]}

 37%|███▋      | 369999/1000000 [4:11:50<5:40:20, 30.85it/s]global step 370000, trans_decision ep_re 292.72984149561955

{"global_step": 370000, "eval_re": [5.61659448257244, 4.371961028718853, 
86.92894436892594, 1507.9972995121805, 4.947404176576901, 9.339717680689462, 
10.12434605046556, 30.87385541423404, 4.432607569046965, 1262.6656846727851], 
"eval_len": [19, 17, 72, 504, 16, 21, 21, 38, 16, 408]}

 38%|███▊      | 379997/1000000 [4:18:31<5:26:12, 31.68it/s]global step 380000, trans_decision ep_re 263.6799264353119

{"global_step": 380000, "eval_re": [108.25974257086749, 186.34464529850155, 
5.6968322885455756, 99.46579624392334, 2154.2278165126827, 6.094053783573313, 
3.2222946861843624, 4.119441369714314, 57.942804897326695, 11.425836701799879], 
"eval_len": [85, 102, 20, 84, 594, 17, 17, 22, 58, 20]}

 39%|███▉      | 389999/1000000 [4:25:26<5:21:43, 31.60it/s]global step 390000, trans_decision ep_re 71.33636210105959

{"global_step": 390000, "eval_re": [13.373465531703381, 116.70759006191679, 
212.2747614723903, 6.338843837436824, 22.85670225138766, 10.28678768194639, 
178.88233149089075, 9.722058692539854, 3.9217340209439495, 138.99934596944], 
"eval_len": [34, 77, 121, 20, 36, 22, 125, 20, 16, 89]}

 40%|███▉      | 399997/1000000 [4:32:30<5:30:34, 30.25it/s]global step 400000, trans_decision ep_re 109.84401120669231

{"global_step": 400000, "eval_re": [10.865186445237695, 6.31058005177917, 
7.887922872282762, 0.8000610594324928, 5.323653782771233, 5.088791592576767, 
7.084835007729056, 9.351218752400328, 154.06013234144453, 891.667730161269], 
"eval_len": [23, 20, 19, 13, 17, 16, 19, 19, 106, 323]}

 41%|████      | 409999/1000000 [4:39:11<5:14:41, 31.25it/s]global step 410000, trans_decision ep_re 402.5424734687678

{"global_step": 410000, "eval_re": [1881.7385835135633, 8.755137881045508, 
3.9618602305803843, 1849.193872004712, 185.32418751295424, 10.205894502081843, 
34.872142810583895, 39.20751340718097, 5.547450405965618, 6.61809241901017], 
"eval_len": [495, 22, 21, 545, 113, 20, 39, 51, 17, 19]}

 42%|████▏     | 419997/1000000 [4:46:06<5:05:35, 31.63it/s]global step 420000, trans_decision ep_re 255.3427939567604

{"global_step": 420000, "eval_re": [9.5278978531458, 692.901496937026, 
74.379965425907, 829.1383263149553, 196.39619263194487, 7.7801652659543645, 
355.41752470888076, 8.649883202839563, 31.81787519942658, 347.41861202752403], 
"eval_len": [23, 251, 79, 355, 97, 23, 152, 20, 41, 152]}

 43%|████▎     | 429997/1000000 [4:53:01<5:01:25, 31.52it/s]global step 430000, trans_decision ep_re 159.39225907698813

{"global_step": 430000, "eval_re": [244.54509670784338, 151.02883244505594, 
1.541926497289645, 210.84268207195828, 64.11182286095537, 5.0864224455019, 
6.025146852532808, 483.70586937212585, 424.4794343812791, 2.555357135339221], 
"eval_len": [154, 83, 16, 110, 70, 18, 16, 192, 199, 17]}

 44%|████▍     | 439999/1000000 [4:59:52<4:48:29, 32.35it/s]global step 440000, trans_decision ep_re 554.2408014100677

{"global_step": 440000, "eval_re": [2527.522908976818, 13.396270519126112, 
3.006683115745997, 568.5644021551332, -1.5472102716154719, 1538.9993321411546, 
6.788909547339326, 5.989117718700014, 873.0518781766207, 6.6357220216539226], 
"eval_len": [640, 22, 20, 229, 17, 435, 21, 19, 278, 21]}

 45%|████▍     | 449997/1000000 [5:07:00<4:53:16, 31.26it/s]global step 450000, trans_decision ep_re 821.6657956463366

{"global_step": 450000, "eval_re": [234.7660452693684, 2538.434298491653, 
5.697804130451813, 221.5674469738773, 3827.4288696683334, 9.311581228694502, 
140.59766435928142, 9.671250047820761, 521.8982610958826, 707.2847351980027], 
"eval_len": [144, 671, 19, 124, 1000, 18, 99, 19, 171, 240]}

 46%|████▌     | 459997/1000000 [5:14:00<4:54:00, 30.61it/s]global step 460000, trans_decision ep_re 851.787630951274

{"global_step": 460000, "eval_re": [1882.6052024230607, 1660.2139975108773, 
152.35732611774185, 1282.8419114313351, 3.7582530973706416, 13.335825037327913, 
194.99506658571636, 3.6994707513340597, 225.587997663948, 3098.4812588940267], 
"eval_len": [525, 486, 97, 354, 17, 35, 112, 16, 114, 792]}

 47%|████▋     | 469997/1000000 [5:21:00<4:41:24, 31.39it/s]global step 470000, trans_decision ep_re 682.1507544970075

{"global_step": 470000, "eval_re": [1416.124512646793, 5.774620945348246, 
8.757075907087103, 5.669437780416742, 187.62197179384376, 5.557801889164339, 
2038.0085064896837, 333.09943739160406, 2813.164573839306, 7.729606286827807], 
"eval_len": [400, 24, 21, 16, 99, 19, 575, 171, 705, 20]}

 48%|████▊     | 479997/1000000 [5:27:40<4:39:13, 31.04it/s]global step 480000, trans_decision ep_re 532.2389882902335

{"global_step": 480000, "eval_re": [239.12921645299267, 55.12724334643793, 
7.350426048115137, 1562.9340104706241, 3057.7952469486067, 18.1448461231686, 
25.89986095780802, 0.9312955255599349, 348.7446392114278, 6.333097817593214], 
"eval_len": [125, 75, 22, 438, 756, 34, 59, 21, 156, 19]}

 49%|████▉     | 489997/1000000 [5:34:50<4:28:39, 31.64it/s]global step 490000, trans_decision ep_re 633.5968097601548

{"global_step": 490000, "eval_re": [3.654721164968958, 4.872565555096787, 
7.557467888197772, 11.071526323236009, 9.288437093658285, 2128.5196920342773, 
1471.7619626009446, 2686.3091252858535, 5.959890663928194, 6.972708991386622], 
"eval_len": [17, 17, 21, 23, 20, 596, 399, 655, 23, 19]}

 50%|████▉     | 499997/1000000 [5:41:33<4:23:51, 31.58it/s]global step 500000, trans_decision ep_re 67.38298095279455

{"global_step": 500000, "eval_re": [21.61737262094912, 28.88695666370151, 
7.8130657716948715, 8.757146651750867, 58.719729780275706, 81.75026359773173, 
6.176350493199846, 219.26491945995815, 229.89256719235294, 10.951437296330688], 
"eval_len": [34, 37, 18, 18, 81, 76, 18, 155, 170, 21]}

 51%|█████     | 509997/1000000 [5:48:25<4:13:49, 32.17it/s]global step 510000, trans_decision ep_re 64.77586572283145

{"global_step": 510000, "eval_re": [6.308240623261978, 10.060202674505147, 
4.637714722667742, 375.70070272570683, 2.5025568883981077, 7.8790742893353345, 
145.99681398852744, 24.108553172672863, 8.67531470023993, 61.88948344299913], 
"eval_len": [19, 21, 21, 143, 16, 21, 76, 40, 34, 60]}

 52%|█████▏    | 519997/1000000 [5:55:17<4:17:09, 31.11it/s]global step 520000, trans_decision ep_re 24.36517024106526

{"global_step": 520000, "eval_re": [8.990433633351438, 41.48477844693543, 
8.259649052392385, 5.940521941116985, 134.9897360409923, 5.127677705248582, 
5.621986008713995, 8.007053471580555, 18.197740862140076, 7.032125248180857], 
"eval_len": [19, 41, 23, 22, 72, 18, 17, 18, 37, 23]}

 53%|█████▎    | 529997/1000000 [6:02:20<4:09:37, 31.38it/s]global step 530000, trans_decision ep_re 550.31232953227

{"global_step": 530000, "eval_re": [1232.8616131639876, 1693.625583016564, 
133.88891966380743, 184.20895243609098, 5.519057206605701, 32.85566015226933, 
5.961410077663704, 19.079391486768458, 2187.3206641341517, 7.802043984791583], 
"eval_len": [308, 448, 127, 100, 20, 44, 16, 34, 525, 18]}

 54%|█████▍    | 539997/1000000 [6:09:20<4:03:19, 31.51it/s]global step 540000, trans_decision ep_re 635.5212494753779

{"global_step": 540000, "eval_re": [1208.308223362033, 192.5129460047895, 
9.1642886295256, 750.4465953515747, 9.684770638181691, 1651.1911150579613, 
405.856278343517, 1965.4837497361175, 8.835630935094823, 153.72889669498423], 
"eval_len": [336, 119, 20, 249, 20, 461, 197, 492, 20, 105]}

 55%|█████▍    | 549997/1000000 [6:16:01<3:56:01, 31.78it/s]global step 550000, trans_decision ep_re 450.66056800936565

{"global_step": 550000, "eval_re": [105.09330034896327, 6.392720148022752, 
12.666569612031655, 4.157996470416683, 1009.3370712363511, 11.449730324774103, 
5.936614581866333, 1180.3529141135095, 1512.3722608983294, 658.8465023593922], 
"eval_len": [84, 19, 25, 15, 290, 22, 19, 363, 420, 238]}

 56%|█████▌    | 559999/1000000 [6:22:57<3:52:24, 31.55it/s]global step 560000, trans_decision ep_re 289.2923488880053

{"global_step": 560000, "eval_re": [6.612080751691795, 7.862141285979743, 
193.14491645129763, 86.22077225178465, 4.8938141163179045, 3.0105102507745425, 
171.59302777075632, 1709.4619568430464, 650.1962219673635, 59.928047191040456], 
"eval_len": [18, 20, 110, 80, 16, 21, 120, 486, 225, 93]}

 57%|█████▋    | 569997/1000000 [6:29:52<3:51:00, 31.02it/s]global step 570000, trans_decision ep_re 351.15046168702236

{"global_step": 570000, "eval_re": [189.82060508893167, 30.293354701766145, 
81.5216881645983, 932.2192395045676, 2230.5842913366246, 30.561070985647255, 
3.6389003336686967, 3.991418931017156, 1.4685444560106595, 7.40550336739168], 
"eval_len": [105, 37, 71, 287, 542, 37, 18, 17, 12, 18]}

 58%|█████▊    | 579999/1000000 [6:36:45<3:45:00, 31.11it/s]global step 580000, trans_decision ep_re 168.68505804659884

{"global_step": 580000, "eval_re": [27.266206329885414, 395.1212239794848, 
1202.718932093811, 8.468856993301792, 9.442973205382854, 10.364964124805175, 
6.641331351426977, 9.939713289575689, 7.927470132369432, 8.95890896594483], 
"eval_len": [37, 214, 335, 23, 36, 21, 16, 23, 20, 20]}

 59%|█████▉    | 589997/1000000 [6:43:50<3:37:19, 31.44it/s]global step 590000, trans_decision ep_re 344.13671585227087

{"global_step": 590000, "eval_re": [260.7973209246358, 4.95161173888325, 
74.2723898451121, 171.40732411659974, 518.3773052449682, 6.815667700593966, 
5.099677103479306, 204.4430259650838, 587.0646030943893, 1608.138232788963], 
"eval_len": [169, 17, 61, 112, 189, 20, 20, 124, 242, 410]}

 60%|█████▉    | 599997/1000000 [6:50:32<3:32:27, 31.38it/s]global step 600000, trans_decision ep_re 401.91907673643794

{"global_step": 600000, "eval_re": [37.23940349428417, 10.003703639563494, 
923.4245151161548, 1685.5046532668816, 85.06420360230041, 6.088391506158062, 
772.7611522294891, 108.21418306377758, 192.01675571870578, 198.87380572706422], 
"eval_len": [55, 24, 268, 439, 103, 17, 238, 80, 98, 114]}

 61%|██████    | 609997/1000000 [6:57:40<3:26:55, 31.41it/s]global step 610000, trans_decision ep_re 595.1888646151791

{"global_step": 610000, "eval_re": [3.4425055112247596, 2451.1152785795134, 
6.126792684242039, 73.25103038892588, 34.98700161869934, 173.16580345597367, 
73.28831197030755, 2135.557535847178, 3.3695830467560497, 997.5848030489714], 
"eval_len": [17, 638, 20, 70, 42, 128, 131, 542, 23, 389]}

 62%|██████▏   | 619997/1000000 [7:04:24<3:21:06, 31.49it/s]global step 620000, trans_decision ep_re 30.48508053944584

{"global_step": 620000, "eval_re": [47.73347438555024, 24.64530459528752, 
73.99646380972008, 43.34663799257305, 10.718341463245402, 10.261334693271632, 
5.218305619156032, 12.26325606941756, 67.91412103098408, 8.753565735252824], 
"eval_len": [57, 38, 69, 53, 23, 20, 17, 21, 58, 21]}

 63%|██████▎   | 629997/1000000 [7:11:30<3:18:34, 31.05it/s]global step 630000, trans_decision ep_re 597.3950664963797

{"global_step": 630000, "eval_re": [9.03032517237213, -0.8141929161706447, 
-0.06134786565276566, 0.6804330999872239, 8.03067222915639, 1646.5878484945167, 
1268.3842602208044, 367.6081777416024, 2614.788316300993, 59.71617248618947], 
"eval_len": [21, 21, 19, 15, 21, 429, 354, 157, 682, 62]}

 64%|██████▍   | 639997/1000000 [7:18:30<3:17:19, 30.41it/s]global step 640000, trans_decision ep_re 920.7217143112717

{"global_step": 640000, "eval_re": [1751.30869602651, 1095.4669578409048, 
470.4112506443571, 31.383987644439934, 2144.6548109445494, 2836.1184482776507, 
4.452067501419302, 455.5950000355177, 61.885262082458574, 355.9406621149106], 
"eval_len": [429, 321, 199, 36, 545, 679, 14, 354, 65, 137]}

 65%|██████▍   | 649997/1000000 [7:25:11<3:08:22, 30.97it/s]global step 650000, trans_decision ep_re 606.4806581531818

{"global_step": 650000, "eval_re": [1698.7223064974328, 144.42057573260627, 
900.2010804643454, 9.703221847617003, 5.043338973471584, 4.303375985165335, 
2623.027059374566, 400.83541592355925, 7.019252384834236, 271.5309543482212], 
"eval_len": [431, 102, 269, 21, 19, 19, 634, 186, 19, 142]}

 66%|██████▌   | 659997/1000000 [7:32:20<2:57:12, 31.98it/s]global step 660000, trans_decision ep_re 503.6954475829287

{"global_step": 660000, "eval_re": [1.9117730325615059, 5.154577433189141, 
168.6971879376715, 1806.7042356549564, 1855.2996317747184, 239.7164635117808, 
2.3895841079659004, 2.696511651946324, 742.4370110095774, 211.9474997149189], 
"eval_len": [20, 19, 101, 446, 452, 174, 19, 19, 227, 163]}

 67%|██████▋   | 669997/1000000 [7:39:02<2:54:47, 31.47it/s]global step 670000, trans_decision ep_re 254.7897113530693

{"global_step": 670000, "eval_re": [174.0397546660407, 4.959855786288982, 
7.104202804086726, 171.58269022709632, 7.131271208608652, 150.0333533933124, 
958.9587138943964, 141.84033226417873, 928.883913558096, 3.3630257285879797], 
"eval_len": [104, 14, 18, 105, 18, 82, 326, 74, 322, 13]}

 68%|██████▊   | 679999/1000000 [7:46:10<2:48:57, 31.57it/s]global step 680000, trans_decision ep_re 341.1582286405459

{"global_step": 680000, "eval_re": [60.23130899541039, 1.5683434891645416, 
4.563758460405081, 1680.5213047979876, 2.0438877923228094, 307.9230953167911, 
533.6185735346556, 7.910193247926446, 767.9635780695887, 45.2382427012061], 
"eval_len": [77, 14, 18, 474, 19, 166, 213, 23, 240, 43]}

 69%|██████▉   | 689997/1000000 [7:52:51<2:40:53, 32.11it/s]global step 690000, trans_decision ep_re 577.8098122301428

{"global_step": 690000, "eval_re": [6.614599157027736, 2586.81819914016, 
321.1730721968808, 543.1610877874703, 32.98212618055794, 7.108373213323032, 
2128.5866123238916, 0.8387132917915112, 27.226034211484325, 123.58930479884152],
"eval_len": [20, 598, 174, 216, 36, 24, 521, 20, 37, 98]}

 70%|██████▉   | 699999/1000000 [8:00:00<2:36:57, 31.86it/s]global step 700000, trans_decision ep_re 724.5172947840873

{"global_step": 700000, "eval_re": [6.936154308455491, 5.922548017343416, 
7.621641371322856, 4381.091402180005, 533.0256583968292, 48.76135187085753, 
7.4471955969270835, 3.7435326240543025, 2242.477580899668, 8.145882575409377], 
"eval_len": [17, 20, 18, 958, 199, 64, 20, 20, 555, 24]}

 71%|███████   | 709997/1000000 [8:06:44<2:40:21, 30.14it/s]global step 710000, trans_decision ep_re 166.05474499834457

{"global_step": 710000, "eval_re": [716.8327491418491, 10.372939342689063, 
213.50778255207968, 4.670931922771385, 9.764979804185467, 601.1840214229476, 
32.12302124540343, 1.6602591629430843, 8.118668761258071, 62.312096627318965], 
"eval_len": [316, 21, 102, 18, 19, 243, 51, 14, 20, 79]}

 72%|███████▏  | 719999/1000000 [8:13:36<2:25:22, 32.10it/s]global step 720000, trans_decision ep_re 211.09414968492266

{"global_step": 720000, "eval_re": [68.53820426418578, 142.13992094227294, 
79.58394379178185, 194.71937389016344, 153.2086839163895, 271.80358652341255, 
904.4515473427298, 14.478551585417863, 149.2907768686192, 132.72690772425395], 
"eval_len": [77, 97, 63, 120, 80, 136, 273, 40, 98, 96]}

 73%|███████▎  | 729997/1000000 [8:20:40<2:21:18, 31.85it/s]global step 730000, trans_decision ep_re 491.4419055334145

{"global_step": 730000, "eval_re": [2.765274315235024, 1273.8826292888277, 
17.31867953205057, 1522.816439259672, 0.9920633593071023, 461.7154455517938, 
820.8541301861269, 8.895588432091827, 5.168148844677558, 800.0106565643625], 
"eval_len": [18, 337, 34, 571, 14, 211, 300, 20, 17, 284]}

 74%|███████▍  | 739997/1000000 [8:27:40<2:15:29, 31.98it/s]global step 740000, trans_decision ep_re 1091.8186853412656

{"global_step": 740000, "eval_re": [5.135136803088908, 1683.3205017190483, 
29.032517912063234, 1869.7301799022862, 8.342713645615218, 2123.5329844664566, 
3242.2952246170908, 3.322782895499999, 1490.534866192437, 462.9399452590699], 
"eval_len": [16, 427, 38, 535, 22, 529, 808, 13, 424, 191]}

 75%|███████▍  | 749997/1000000 [8:34:40<2:11:44, 31.63it/s]global step 750000, trans_decision ep_re 1538.0122032181357

{"global_step": 750000, "eval_re": [9.254823650560885, 4669.212956805499, 
1451.7194113566331, 9.99137580727128, 4113.6119598864925, 4080.40364529982, 
7.014333174481014, 1024.9012567168259, 6.185482934298135, 7.82678654947893], 
"eval_len": [21, 1000, 445, 22, 935, 926, 18, 333, 22, 18]}

 76%|███████▌  | 759997/1000000 [8:41:27<2:08:04, 31.23it/s]global step 760000, trans_decision ep_re 87.63985373917683

{"global_step": 760000, "eval_re": [9.127263825568065, 6.1597097613932625, 
1.8172403203454264, 90.07171328593331, 95.9785804342359, 195.0138941331806, 
5.856375630538503, 419.2222345734873, 46.45213939166809, 6.699386035417832], 
"eval_len": [20, 16, 19, 81, 80, 156, 19, 164, 50, 21]}

 77%|███████▋  | 769999/1000000 [8:48:30<1:59:56, 31.96it/s]global step 770000, trans_decision ep_re 518.9573399282237

{"global_step": 770000, "eval_re": [834.3432477185779, 9.103588436922099, 
410.1516564580447, 230.42874987382478, 782.7995693720402, 107.35177501108404, 
580.4938601745946, 429.59251796209037, 106.98077913656141, 1698.3276551384963], 
"eval_len": [239, 21, 151, 115, 273, 99, 205, 177, 71, 420]}

 78%|███████▊  | 779997/1000000 [8:55:14<1:55:46, 31.67it/s]global step 780000, trans_decision ep_re 570.3220909738567

{"global_step": 780000, "eval_re": [180.25485819796177, 3.7516781663443868, 
9.110483171731508, 63.73737142426138, 2837.090647090623, 57.270821966610455, 
63.62428874781505, 880.9288670994958, 1566.525242350306, 40.926651523417725], 
"eval_len": [101, 16, 19, 62, 756, 55, 56, 271, 389, 52]}

 79%|███████▉  | 789999/1000000 [9:02:20<1:52:36, 31.08it/s]global step 790000, trans_decision ep_re 647.1306977882998

{"global_step": 790000, "eval_re": [6.404283352211466, 2033.030887081, 
178.7453826320592, 942.2785461021363, 5.486209911301412, 526.6518049028845, 
882.0478215819152, 8.45417775947698, 1361.636102015634, 526.5717625443787], 
"eval_len": [20, 515, 97, 331, 17, 201, 306, 20, 369, 215]}

 80%|███████▉  | 799997/1000000 [9:09:10<1:44:17, 31.96it/s]global step 800000, trans_decision ep_re 414.9376077873527

{"global_step": 800000, "eval_re": [2324.0392476297884, 263.5407662510007, 
3.6165043831344588, 7.091344708131832, 8.523085008036592, 59.630227578291795, 
52.22090287811598, 190.3180029015361, 971.3792843466902, 269.01671218880057], 
"eval_len": [587, 122, 17, 19, 22, 58, 53, 103, 331, 124]}

 81%|████████  | 809997/1000000 [9:15:50<1:39:27, 31.84it/s]global step 810000, trans_decision ep_re 589.9325295153136

{"global_step": 810000, "eval_re": [763.4453320653301, 236.09307526141174, 
133.56728139247645, 133.0750598344282, 161.54721968013368, 3701.604917385421, 
245.25470562392712, 228.31417702135136, 3.931710558072338, 292.49181633058356], 
"eval_len": [229, 118, 83, 87, 123, 862, 138, 106, 16, 167]}

 82%|████████▏ | 819999/1000000 [9:23:00<1:34:19, 31.81it/s]global step 820000, trans_decision ep_re 893.0601536493166

{"global_step": 820000, "eval_re": [9.06964363013935, 780.666292917343, 
1689.0941114472976, 85.92733090064203, 1836.0024057563248, 329.5196440619557, 
2.743926844500307, 821.2739988835998, 2223.1065937381472, 1153.1975883132172], 
"eval_len": [19, 266, 440, 72, 444, 160, 14, 248, 588, 316]}

 83%|████████▎ | 829997/1000000 [9:29:50<1:27:37, 32.33it/s]global step 830000, trans_decision ep_re 549.752298457133

{"global_step": 830000, "eval_re": [429.4191524450827, 218.18973703216395, 
4.6260465415607595, 306.24202374784426, 183.27229996056056, 1179.3159314770273, 
109.94303941114605, 2012.4573257735044, 6.278635295471552, 1047.778792886967], 
"eval_len": [169, 145, 19, 144, 104, 341, 74, 455, 18, 297]}

 84%|████████▍ | 839997/1000000 [9:36:40<1:26:43, 30.75it/s]global step 840000, trans_decision ep_re 452.001646531596

{"global_step": 840000, "eval_re": [7.0179134788930995, 394.72414210186975, 
6.654191904034903, 927.9137399839154, 197.37159448829826, 6.91709073096382, 
199.0842120939753, 1217.2237964392898, 1501.3102669907707, 61.79951710394868], 
"eval_len": [17, 226, 22, 289, 119, 18, 105, 344, 436, 62]}

 85%|████████▍ | 849997/1000000 [9:43:30<1:18:49, 31.72it/s]global step 850000, trans_decision ep_re 651.1012640511833

{"global_step": 850000, "eval_re": [593.4584550979848, 468.2273903980265, 
672.0138621558822, 1404.443412360471, 277.6548430830579, 252.21442553019514, 
241.11305252720183, 8.824944737158981, 406.4692630808886, 2186.592991540966], 
"eval_len": [227, 177, 234, 366, 135, 154, 125, 18, 312, 567]}

 86%|████████▌ | 859997/1000000 [9:50:12<1:12:32, 32.16it/s]global step 860000, trans_decision ep_re 485.10545802373554

{"global_step": 860000, "eval_re": [1759.7751425664458, 51.65354667462392, 
10.2450632442519, 1.145635957794387, 5.622795794530332, 50.28300119822308, 
4.95500233287778, 1355.0793086822985, 905.7391661333035, 706.5559176530066], 
"eval_len": [428, 59, 23, 16, 18, 63, 20, 332, 265, 237]}

 87%|████████▋ | 869999/1000000 [9:57:03<1:07:13, 32.23it/s]global step 870000, trans_decision ep_re 183.9909471667542

{"global_step": 870000, "eval_re": [85.77114862939402, 24.395138591572476, 
112.2184704166969, 592.5317236990966, 69.16672687668263, 735.7412960061016, 
10.125101210257538, 5.41651613767976, 148.31407805047886, 56.229272049581745], 
"eval_len": [74, 50, 93, 195, 80, 229, 21, 20, 106, 60]}

 88%|████████▊ | 879997/1000000 [10:03:53<1:04:00, 31.25it/s]global step 880000, trans_decision ep_re 120.10299059687497

{"global_step": 880000, "eval_re": [6.7364281547276725, 5.009699737372931, 
41.55261897818773, 61.65015441132783, 821.6189798841423, 5.185825791722061, 
158.31922142327062, 84.06548425971035, 11.439373636128913, 5.452119692159462], 
"eval_len": [20, 15, 87, 59, 225, 22, 96, 68, 21, 18]}

 89%|████████▉ | 889999/1000000 [10:10:41<56:07, 32.67it/s]global step 890000, trans_decision ep_re 322.6239985086188

{"global_step": 890000, "eval_re": [707.6314296144576, 7.8091947515255, 
236.40327048272167, 1599.8421246287437, 12.86618097951511, 483.3212816722835, 
11.290452226918442, 8.89535556953302, 73.5309774464764, 84.64971771401302], 
"eval_len": [222, 21, 106, 419, 24, 191, 24, 21, 78, 91]}

 90%|████████▉ | 899997/1000000 [10:17:30<51:35, 32.31it/s]global step 900000, trans_decision ep_re 619.1067999719828

{"global_step": 900000, "eval_re": [60.643372714698074, 37.92215770979844, 
984.5834311720204, 9.798313310915773, 904.0771430598695, 55.39831898627261, 
5.124454743656647, 3956.228010017641, 173.07427776607932, 4.218520238875088], 
"eval_len": [64, 53, 273, 20, 262, 64, 19, 824, 105, 18]}

 91%|█████████ | 909999/1000000 [10:24:21<46:50, 32.02it/s]global step 910000, trans_decision ep_re 350.4875031897831

{"global_step": 910000, "eval_re": [51.4305103864669, 394.1899644594306, 
12.171206095356293, 4.37769891654316, 1.452507962754177, 43.35548533682021, 
685.0922945613225, 1236.4488805316093, 1077.097633333711, -0.7411496861826027], 
"eval_len": [65, 162, 88, 15, 18, 62, 270, 354, 323, 15]}

 92%|█████████▏| 919997/1000000 [10:31:11<41:42, 31.97it/s]global step 920000, trans_decision ep_re 356.9897437499907

{"global_step": 920000, "eval_re": [9.039827407646293, 75.75697324678116, 
2066.004416457969, 12.347747071323806, 583.6084775072967, 147.48916027700045, 
589.5723740596432, 73.00741435705486, 7.364845005118805, 5.706202110072596], 
"eval_len": [17, 85, 481, 24, 218, 78, 224, 67, 19, 15]}

 93%|█████████▎| 929999/1000000 [10:38:01<36:38, 31.84it/s]global step 930000, trans_decision ep_re 586.9034285306224

{"global_step": 930000, "eval_re": [0.2694911003321222, 126.4869786386132, 
757.3314324837143, 2.5837137639495693, 1045.4033590917938, 671.8680907831031, 
1417.7021938140979, 100.91294071352056, 124.33645863503078, 1622.1396262820683],
"eval_len": [13, 90, 244, 17, 277, 226, 437, 83, 95, 403]}

 94%|█████████▍| 939997/1000000 [10:44:53<31:25, 31.83it/s]global step 940000, trans_decision ep_re 430.8737939588329

{"global_step": 940000, "eval_re": [9.506082695116428, 2806.377973417915, 
4.575757891902111, 234.01423420212862, 743.7864353351717, 165.09837922571572, 
6.752204695498094, 127.81445649672361, 207.3911640286321, 3.421251599524779], 
"eval_len": [19, 674, 20, 116, 238, 108, 19, 98, 170, 14]}

 95%|█████████▍| 949999/1000000 [10:51:45<25:36, 32.54it/s]global step 950000, trans_decision ep_re 677.772637840106
 95%|█████████▍| 949999/1000000 [10:52:00<25:36, 32.54it/s]
{"global_step": 950000, "eval_re": [1007.3520368023734, 1358.3399224393443, 
2052.6199244506583, 277.1284349069212, 140.67897308175515, 161.83373831415253, 
1313.8248289519531, 150.7455519955266, 169.41069151759694, 145.79227594077818], 
"eval_len": [320, 351, 442, 139, 88, 99, 397, 94, 96, 104]}

 96%|█████████▌| 959997/1000000 [10:58:50<21:00, 31.72it/s]global step 960000, trans_decision ep_re 1184.4261954024787

{"global_step": 960000, "eval_re": [248.9467333070155, 145.56482010631592, 
3686.2516700640685, 796.6634101695639, 3.286531155897682, 2513.4939446163808, 
176.0502499855436, 2511.3933205606545, 264.6763233190091, 1497.9349507403372], 
"eval_len": [124, 85, 861, 252, 16, 648, 111, 566, 130, 409]}

 97%|█████████▋| 969997/1000000 [11:05:50<15:51, 31.55it/s]global step 970000, trans_decision ep_re 677.5917161703916

{"global_step": 970000, "eval_re": [219.53893125707611, 206.20857127722624, 
139.6599440977825, 6.017067710716361, 52.961608312349036, 178.78178564608717, 
1729.1447861497807, 2531.416055317446, 819.3189255903307, 892.8694863451198], 
"eval_len": [123, 120, 130, 19, 53, 152, 532, 620, 303, 345]}

 98%|█████████▊| 979997/1000000 [11:12:33<10:40, 31.25it/s]global step 980000, trans_decision ep_re 936.2468752424808

{"global_step": 980000, "eval_re": [212.55864104115295, 68.80644858120438, 
4599.872186345721, 171.66015384279686, 6.263275671318262, 3.1073484596598453, 
161.6124653258635, 3.874863022385442, 3774.477264137274, 360.23610599743273], 
"eval_len": [120, 72, 943, 102, 16, 16, 121, 17, 813, 157]}

 99%|█████████▉| 989999/1000000 [11:19:40<05:17, 31.53it/s]global step 990000, trans_decision ep_re 106.36663168237985

{"global_step": 990000, "eval_re": [6.655678671990062, 57.72470721054019, 
243.12261339168222, 2.8074893876254747, 5.079408835614902, 8.04319125888673, 
288.9816078008151, 358.59969091268533, 2.542976556426324, 90.10895279753217], 
"eval_len": [18, 137, 148, 17, 17, 18, 180, 166, 19, 77]}

100%|█████████▉| 999997/1000000 [11:26:40<00:00, 31.53it/s]global step 1000000, trans_decision ep_re 1035.736429466563

{"global_step": 1000000, "eval_re": [379.2311953685255, 483.6603531066401, 
68.57181568705937, 66.63586505911462, 2508.4383143378654, 2139.6119263364235, 
408.722263376601, 1.07281627628321, 1955.5930967220102, 2345.8266483951084], 
"eval_len": [144, 295, 68, 68, 549, 505, 162, 17, 486, 531]}

100%|██████████| 1000000/1000000 [11:26:41<00:00, 24.27it/s]
