
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [02:30<5:39:22, 48.62it/s]global step 10000, trans_decision ep_re 424.55388241244543

{"global_step": 10000, "eval_re": [380.4467989084864, 184.05267380439312, 
142.36389689869466, 166.70318582043322, 444.75628008015747, 320.3800514167792, 
1118.9089165724774, 221.04093121174554, 245.9843033100764, 1020.9017861012113], 
"eval_len": [215, 239, 121, 202, 292, 191, 1000, 119, 144, 1000]}

  2%|▏         | 19995/1000000 [07:00<5:37:26, 48.40it/s]global step 20000, trans_decision ep_re 334.7852997107176

{"global_step": 20000, "eval_re": [343.6406442685316, 354.9197000473459, 
393.7575829335018, 69.77965722278576, 543.9372751274783, 379.46777618737383, 
343.5358646776741, 111.41096761358082, 354.8941384280772, 452.5093906008261], 
"eval_len": [453, 174, 184, 181, 263, 199, 471, 158, 466, 227]}

  3%|▎         | 29997/1000000 [11:50<5:32:51, 48.57it/s]global step 30000, trans_decision ep_re 349.29983923167396

{"global_step": 30000, "eval_re": [195.92606836812428, 128.62857018875889, 
468.31196863310714, 536.3673084043553, 498.7603891836688, 97.23969453287462, 
403.70569305127026, 393.2530438242284, 357.0355933975, 413.7700627328515], 
"eval_len": [310, 210, 291, 406, 371, 175, 231, 227, 208, 235]}

  4%|▍         | 39995/1000000 [16:15<5:27:27, 48.86it/s]global step 40000, trans_decision ep_re 263.42750680754574

{"global_step": 40000, "eval_re": [173.5304633753861, 40.90506334808219, 
232.49012141285758, 336.9045961897364, 35.09486108448424, 367.5576115301749, 
357.92215558030574, 255.87044805545557, 500.220491489264, 333.7792560097108], 
"eval_len": [326, 49, 138, 185, 60, 217, 183, 171, 290, 191]}

  5%|▍         | 49997/1000000 [21:10<5:23:32, 48.94it/s]global step 50000, trans_decision ep_re 814.3456571996928

{"global_step": 50000, "eval_re": [938.7432682432261, 48.67941681095154, 
1473.947605340756, 1611.8162892266623, 1081.0078191689624, 692.4425346329167, 
1008.3102482421137, 407.564872320488, 854.5744361752252, 26.37008183562708], 
"eval_len": [511, 120, 1000, 678, 654, 369, 1000, 220, 368, 38]}

  6%|▌         | 59997/1000000 [25:50<5:19:58, 48.96it/s]global step 60000, trans_decision ep_re 691.544283080241

{"global_step": 60000, "eval_re": [172.61083730924398, 873.2083552583574, 
1099.1281138413694, 159.49729983568093, 572.0731056074084, 571.9671322051859, 
1282.4106063563825, 1161.6142049551875, 808.8305281124718, 214.10264732112182], 
"eval_len": [172, 370, 556, 216, 337, 292, 1000, 597, 422, 254]}

  7%|▋         | 69997/1000000 [30:16<5:19:22, 48.53it/s]global step 70000, trans_decision ep_re 486.3458876623172

{"global_step": 70000, "eval_re": [738.6568585726573, 540.4583766568621, 
136.2131507555123, 288.7532722728848, 435.4922116902774, 84.97891946712566, 
415.531930565674, 1037.3461738353462, 716.2732868931765, 469.75469591365663], 
"eval_len": [327, 287, 141, 176, 219, 178, 195, 432, 349, 239]}

  8%|▊         | 79997/1000000 [35:10<5:14:29, 48.76it/s]global step 80000, trans_decision ep_re 894.9859160034624

{"global_step": 80000, "eval_re": [1666.7014534339594, 1269.474275290089, 
897.3895640894957, 262.7898650757503, 999.9888082749762, 519.8332062551575, 
429.3441749654423, 201.64308067009665, 2122.0436486112408, 580.6510833684151], 
"eval_len": [710, 516, 416, 143, 446, 193, 203, 85, 768, 289]}

  9%|▉         | 89999/1000000 [39:33<5:11:22, 48.71it/s]global step 90000, trans_decision ep_re 776.4592833709833

{"global_step": 90000, "eval_re": [2.6650584589152118, 648.3832828456552, 
652.0012244099521, 1534.9021233731314, 398.19788661902516, 540.3763878758244, 
1917.081507077872, 777.663751004326, 400.94816269637255, 892.3734493487583], 
"eval_len": [14, 253, 239, 472, 179, 216, 613, 302, 196, 315]}

 10%|▉         | 99999/1000000 [44:30<5:09:09, 48.52it/s]global step 100000, trans_decision ep_re 1678.2456746451821

{"global_step": 100000, "eval_re": [2182.962991659031, 4.183901332775354, 
3448.1272134111337, 826.9777505606197, 2216.5147391137098, 570.7753008516381, 
885.6746858767606, 1508.8365211923117, 3038.640196797832, 2099.763445656011], 
"eval_len": [751, 17, 983, 283, 706, 250, 373, 558, 1000, 637]}

 11%|█         | 109997/1000000 [49:10<5:03:35, 48.86it/s]global step 110000, trans_decision ep_re 1347.1050924693773

{"global_step": 110000, "eval_re": [2856.1355685226245, 191.3235495005765, 
3591.4819365586104, 643.2992378273934, 445.27669369253994, 165.25209709957994, 
747.2388524332761, 1094.641863522995, 2172.522890464346, 1563.8782350718309], 
"eval_len": [887, 111, 1000, 256, 179, 79, 276, 439, 657, 524]}

 12%|█▏        | 119998/1000000 [53:50<4:57:28, 49.30it/s]global step 120000, trans_decision ep_re 2025.8791786696722

{"global_step": 120000, "eval_re": [2008.4192131940886, 1085.6104994829798, 
3223.1244126846123, 2605.766774466881, 1563.501159612977, 3433.3966353769974, 
905.718362587926, 2191.2540337985374, 2955.5901110543464, 286.4105844373772], 
"eval_len": [600, 350, 1000, 782, 460, 1000, 343, 747, 837, 159]}

 13%|█▎        | 129997/1000000 [58:21<4:54:43, 49.20it/s]global step 130000, trans_decision ep_re 1102.5696740357162

{"global_step": 130000, "eval_re": [1986.5559780519181, 159.52824957737585, 
2145.9417269962996, 1664.6190111681908, 138.23215802883556, 1529.5985045706607, 
141.79202932794985, 107.44360667476442, 138.76689852838936, 3013.218577432777], 
"eval_len": [560, 90, 578, 470, 75, 464, 71, 69, 81, 813]}

 14%|█▍        | 139999/1000000 [1:03:10<4:46:31, 50.02it/s]global step 140000, trans_decision ep_re 994.1841526818198

{"global_step": 140000, "eval_re": [916.4379031363862, 377.86545912625104, 
1802.4775578215979, 667.2733023565708, 3191.6843689791626, 937.5468015726121, 
703.7389515150816, 705.4043240179338, 29.4179973471948, 609.9948609454071], 
"eval_len": [291, 213, 496, 235, 898, 280, 227, 227, 29, 231]}

 15%|█▍        | 149995/1000000 [1:07:40<4:43:14, 50.02it/s]global step 150000, trans_decision ep_re 635.9605719073514

{"global_step": 150000, "eval_re": [2.2785760086090603, 66.57868281193446, 
61.160840648040285, 135.26271846837307, 131.49770932833087, 3584.864085396089, 
403.17968821969333, 1710.0519710033886, 137.2252792830229, 127.50616790603307], 
"eval_len": [17, 73, 54, 106, 84, 1000, 203, 535, 82, 87]}

 16%|█▌        | 159999/1000000 [1:12:01<4:42:56, 49.48it/s]global step 160000, trans_decision ep_re 1244.3229969601368

{"global_step": 160000, "eval_re": [122.30960340349816, 801.3552439504925, 
2290.609769397036, 504.3114220717367, 1987.6190225934133, 2572.901263014655, 
1792.3141495975397, 789.8711754012811, 1582.1765019967538, 
-0.23818182503714935], "eval_len": [81, 263, 570, 197, 492, 644, 470, 280, 405, 
17]}

 17%|█▋        | 169999/1000000 [1:16:50<4:38:17, 49.71it/s]global step 170000, trans_decision ep_re 2936.955801966519

{"global_step": 170000, "eval_re": [1947.289974988518, 1855.3671617017117, 
556.1539870825044, 3749.601252517356, 3675.3451457121237, 3937.086326592946, 
4022.799398377783, 3487.904735091111, 3655.683881911343, 2482.326155689792], 
"eval_len": [537, 532, 225, 1000, 1000, 1000, 1000, 865, 957, 656]}

 18%|█▊        | 179999/1000000 [1:21:40<4:37:51, 49.19it/s]global step 180000, trans_decision ep_re 2867.281742384379

{"global_step": 180000, "eval_re": [828.9573607656963, 2677.6009013123635, 
2208.078623841787, 2336.938163406698, 4144.604883968897, 2468.324829662309, 
4227.087659863997, 3085.0018375924387, 2417.915071088282, 4278.308092341318], 
"eval_len": [261, 628, 606, 627, 1000, 622, 1000, 735, 650, 979]}

 19%|█▉        | 189997/1000000 [1:26:10<4:30:51, 49.84it/s]global step 190000, trans_decision ep_re 1642.637420298524

{"global_step": 190000, "eval_re": [4098.715864001504, 2157.8876607147827, 
3097.650817638499, 1515.5019293024654, 148.3264521077757, 188.91383118756715, 
156.39109596290572, 1550.2763479804892, 1899.363366512379, 1613.3468375768718], 
"eval_len": [1000, 588, 775, 397, 102, 120, 94, 410, 492, 466]}

 20%|█▉        | 199997/1000000 [1:31:00<4:30:09, 49.35it/s]global step 200000, trans_decision ep_re 1848.468884007151

{"global_step": 200000, "eval_re": [705.6727820967932, 3968.9327145535917, 
186.8041280271736, 2664.212126722641, 248.98707970093471, 2707.9194134809945, 
1447.3319046613751, 4364.620861570769, 43.06901640681032, 2147.1388128504263], 
"eval_len": [251, 1000, 118, 630, 132, 711, 427, 1000, 46, 558]}

 21%|██        | 209999/1000000 [1:35:40<4:25:22, 49.62it/s]global step 210000, trans_decision ep_re 3182.5032410202903

{"global_step": 210000, "eval_re": [4259.570573145138, 4034.4478574619193, 
4107.524507886315, 66.20433053170343, 4421.245066314164, 3151.084688305438, 
3247.2667134594853, 1968.0926386280698, 4034.014772415058, 2535.5812620556094], 
"eval_len": [1000, 1000, 1000, 64, 1000, 773, 843, 540, 1000, 641]}

 22%|██▏       | 219995/1000000 [1:40:30<4:22:05, 49.60it/s]global step 220000, trans_decision ep_re 2058.8884621866614

{"global_step": 220000, "eval_re": [1344.932459604197, 4203.131524721726, 
1499.155309769867, 3398.884340838253, 4045.12500586855, 1698.8178973103647, 
198.40238645611979, 1718.7425157527466, 2368.267914594828, 113.42526694996371], 
"eval_len": [423, 1000, 410, 811, 1000, 465, 98, 446, 616, 73]}

 23%|██▎       | 229997/1000000 [1:45:10<4:20:39, 49.23it/s]global step 230000, trans_decision ep_re 1387.1621801383767

{"global_step": 230000, "eval_re": [2013.4841047495045, 2306.65005261301, 
116.63355630555674, 670.9208902013035, 953.0133383228435, 1407.8156039164533, 
785.0028734739316, 1077.9607660557692, 2835.578639205314, 1704.561976540083], 
"eval_len": [566, 580, 58, 252, 333, 398, 240, 362, 645, 433]}

 24%|██▍       | 239995/1000000 [1:49:50<4:16:41, 49.34it/s]global step 240000, trans_decision ep_re 1520.494978552781

{"global_step": 240000, "eval_re": [3343.6111054915455, 4205.762806084183, 
403.447755311485, 1093.9617609461097, 364.1375717545768, 816.8354338587501, 
1626.1614547692677, 3164.4165704530114, 79.41722063309408, 107.1981062257886], 
"eval_len": [839, 1000, 172, 319, 160, 261, 467, 744, 60, 87]}

 25%|██▍       | 249995/1000000 [1:54:30<4:13:02, 49.40it/s]global step 250000, trans_decision ep_re 2759.3617905403585

{"global_step": 250000, "eval_re": [148.5660887376078, 4107.467542725751, 
3974.4442201135516, 4161.685706604748, 161.18412146739163, 4073.6261259789026, 
4167.260441307647, 4031.330496612115, 2323.547140589526, 444.50602126634334], 
"eval_len": [81, 1000, 1000, 1000, 94, 1000, 1000, 1000, 659, 182]}

 26%|██▌       | 259997/1000000 [1:59:10<4:12:38, 48.82it/s]global step 260000, trans_decision ep_re 1755.302589568712

{"global_step": 260000, "eval_re": [1021.8046318274338, 4045.7208210163767, 
476.90652155937045, 1819.2158016163078, 31.331274934184293, 131.01615439885273, 
3045.860707803911, 4015.386661862041, 1070.9632843909608, 1894.8200362776815], 
"eval_len": [313, 1000, 216, 507, 29, 88, 796, 1000, 352, 519]}

 27%|██▋       | 269995/1000000 [2:03:50<4:07:29, 49.16it/s]global step 270000, trans_decision ep_re 1811.3331134812677

{"global_step": 270000, "eval_re": [1097.7675251559413, 3816.996177415244, 
150.52116116187324, 109.60543787310395, 4151.537588705794, 3920.6018614262666, 
2174.0066682327324, 269.78082107264095, 2376.5475266377325, 45.966367131345066],
"eval_len": [347, 1000, 94, 275, 980, 977, 582, 126, 611, 45]}

 28%|██▊       | 279998/1000000 [2:08:30<4:02:43, 49.44it/s]global step 280000, trans_decision ep_re 2367.789619795693

{"global_step": 280000, "eval_re": [4173.936151620732, 747.0269163724569, 
4078.2252715308473, 3265.7503466919547, 1192.0521126544302, 3965.4966521071606, 
221.31906599367022, 4380.981166483215, 840.7799886122037, 812.3285258902571], 
"eval_len": [1000, 299, 1000, 745, 327, 1000, 122, 1000, 313, 256]}

 29%|██▉       | 289997/1000000 [2:13:10<3:58:06, 49.70it/s]global step 290000, trans_decision ep_re 1293.6058433349488

{"global_step": 290000, "eval_re": [3712.30832016937, 163.4199804409025, 
147.3193904189493, 120.54543349742931, 2680.303312163392, 2.2587428772589218, 
2126.455631242004, 3739.13746506649, 123.74238761372361, 120.56776985996879], 
"eval_len": [1000, 88, 82, 62, 736, 17, 553, 917, 68, 65]}

 30%|██▉       | 299995/1000000 [2:17:35<3:55:57, 49.45it/s]global step 300000, trans_decision ep_re 1140.4443836570495

{"global_step": 300000, "eval_re": [860.7581905583338, 1363.4643869221045, 
130.23577898838627, 194.95052647378088, 3938.8479109145005, 95.98107856541083, 
2317.517665941507, 1970.6425979722487, 187.8400056151238, 344.2056946190995], 
"eval_len": [291, 379, 72, 112, 905, 73, 566, 486, 118, 172]}

 31%|███       | 309997/1000000 [2:22:10<3:52:31, 49.46it/s]global step 310000, trans_decision ep_re 1851.9989582094943

{"global_step": 310000, "eval_re": [2593.3095346366235, 3058.3619961297672, 
4136.693942661436, 1112.5184474814278, 88.30185347343695, 2395.0498259712635, 
1582.5768447012665, 513.0336284441864, 377.988202018584, 2662.1553065769526], 
"eval_len": [602, 725, 930, 325, 66, 633, 440, 207, 152, 663]}

 32%|███▏      | 319997/1000000 [2:27:00<3:49:28, 49.39it/s]global step 320000, trans_decision ep_re 2106.2602794525837

{"global_step": 320000, "eval_re": [4272.607896256048, 2610.3049266675785, 
33.03266375070456, 180.91270554372957, 1953.5525894575312, 3668.872370490475, 
4372.000703064692, 224.7751779332638, 605.8301888276656, 3140.7135725341464], 
"eval_len": [1000, 681, 30, 107, 475, 880, 1000, 170, 212, 701]}

 33%|███▎      | 329999/1000000 [2:31:30<3:47:13, 49.14it/s]global step 330000, trans_decision ep_re 1451.3422737880403

{"global_step": 330000, "eval_re": [336.05512649111364, 586.567750478858, 
1789.8353568519437, 3521.3993738638287, 1446.2044102974423, 231.0599906008773, 
2915.7700657367286, 2472.5607174690995, 0.3909747507745278, 1213.578971339736], 
"eval_len": [165, 217, 460, 811, 410, 135, 706, 603, 15, 360]}

 34%|███▍      | 339999/1000000 [2:36:21<3:42:18, 49.48it/s]global step 340000, trans_decision ep_re 2786.261518668857

{"global_step": 340000, "eval_re": [4038.462707299833, 4068.1156729422064, 
232.20575322937088, 4035.3606207794655, 220.51079727462195, 3023.9792533483546, 
889.5580749502932, 4036.8426702008196, 3040.036751898445, 4277.542884765158], 
"eval_len": [1000, 1000, 147, 1000, 139, 716, 306, 1000, 758, 1000]}

 35%|███▍      | 349996/1000000 [2:41:11<3:38:12, 49.65it/s]global step 350000, trans_decision ep_re 2623.3988067472915

{"global_step": 350000, "eval_re": [1157.1662527282467, 2365.3160377862223, 
4514.42091899444, 2564.899886781661, 1171.1041628531675, 2910.2522954745405, 
137.3726390996031, 4642.402235626197, 4505.25284516919, 2265.800792959642], 
"eval_len": [331, 559, 1000, 605, 339, 678, 85, 1000, 1000, 525]}

 36%|███▌      | 359998/1000000 [2:45:51<3:35:28, 49.50it/s]global step 360000, trans_decision ep_re 1691.485106223236

{"global_step": 360000, "eval_re": [3878.4218756972773, 1560.2571141210938, 
43.48639924398595, 205.28067825691443, 4067.153650084407, 123.97191054481692, 
301.59501487040023, 980.5485924995096, 4010.184228695855, 1743.9515982180997], 
"eval_len": [1000, 439, 104, 122, 1000, 75, 146, 335, 1000, 480]}

 37%|███▋      | 369997/1000000 [2:50:21<3:33:09, 49.26it/s]global step 370000, trans_decision ep_re 1864.3415337938386

{"global_step": 370000, "eval_re": [736.263739133708, 2429.8569243529546, 
3164.051526754078, 2286.3206588190556, 2175.7466162438536, 428.2143798768234, 
2786.0833702017294, 4352.113598077688, 285.74603023722716, -0.9815057587316098],
"eval_len": [252, 587, 785, 570, 527, 175, 624, 1000, 140, 13]}

 38%|███▊      | 379999/1000000 [2:55:11<3:28:53, 49.47it/s]global step 380000, trans_decision ep_re 2032.3619156881225

{"global_step": 380000, "eval_re": [537.02627920494, 293.24386146190653, 
2347.3373497197595, 2274.7654700966646, 4165.263664411153, 4146.871800371591, 
397.6539615892005, 1565.67207575611, 4237.041015674442, 358.743678595459], 
"eval_len": [210, 134, 584, 564, 1000, 1000, 154, 438, 1000, 161]}

 39%|███▉      | 389997/1000000 [3:00:01<3:23:54, 49.86it/s]global step 390000, trans_decision ep_re 2947.2890979483527

{"global_step": 390000, "eval_re": [2122.6844556957594, 1684.612532517853, 
4213.091706806478, 4308.04905819983, 295.9963719936802, 3204.3786563782473, 
4524.907908642174, 4420.891520822346, 3133.962804174216, 1564.3159642529415], 
"eval_len": [566, 468, 1000, 1000, 175, 768, 1000, 1000, 744, 408]}

 40%|███▉      | 399995/1000000 [3:04:41<3:21:11, 49.71it/s]global step 400000, trans_decision ep_re 2411.992995395086

{"global_step": 400000, "eval_re": [1093.3030851796516, 905.5278776054305, 
1713.0380747083714, 2717.253030093171, 2614.764155322703, 4284.418858928048, 
-1.645086901390021, 4423.1046811204, 2140.529671699039, 4229.635606195435], 
"eval_len": [315, 290, 449, 639, 638, 1000, 18, 1000, 494, 1000]}

 41%|████      | 409997/1000000 [3:09:31<3:18:45, 49.47it/s]global step 410000, trans_decision ep_re 1979.678806849845

{"global_step": 410000, "eval_re": [816.0313972031528, 208.52511908829416, 
1322.3961144613534, 1485.6748297369472, 87.70603508144507, 4062.6359559962098, 
4382.823816057598, 2912.376961904471, 170.81559305551843, 4347.802245913457], 
"eval_len": [260, 110, 360, 404, 83, 1000, 1000, 740, 95, 1000]}

 42%|████▏     | 419999/1000000 [3:14:11<3:16:09, 49.28it/s]global step 420000, trans_decision ep_re 3554.7716486459767

{"global_step": 420000, "eval_re": [4356.490927669891, 2064.772812540684, 
4461.549432181547, 2098.235066388235, 4311.192766991738, 4410.355828204828, 
840.3164554987401, 4218.357138614046, 4474.389808406636, 4312.056249963422], 
"eval_len": [1000, 554, 1000, 522, 1000, 1000, 265, 964, 1000, 951]}

 43%|████▎     | 429999/1000000 [3:19:01<3:11:52, 49.51it/s]global step 430000, trans_decision ep_re 2546.868912948059

{"global_step": 430000, "eval_re": [2729.479887821101, 2969.79874239494, 
672.2413198839075, 4323.42739980516, 4674.180503102762, 4572.4524361870435, 
2841.1422251427985, 666.0403308532964, 1723.9075828845225, 296.01870140505895], 
"eval_len": [694, 698, 227, 1000, 1000, 1000, 667, 236, 438, 125]}

 44%|████▍     | 439999/1000000 [3:23:41<3:10:32, 48.98it/s]global step 440000, trans_decision ep_re 1562.6029331455534

{"global_step": 440000, "eval_re": [4435.1992580750775, 208.19978428641372, 
86.79078683512738, 21.649815646563326, 1926.6794337375802, 1255.9974825666563, 
203.6882947362437, 2671.9015425180423, 4710.825091373346, 105.09784168048074], 
"eval_len": [1000, 107, 104, 32, 471, 348, 125, 720, 1000, 79]}

 45%|████▍     | 449995/1000000 [3:28:21<3:05:38, 49.38it/s]global step 450000, trans_decision ep_re 1862.0460107395952

{"global_step": 450000, "eval_re": [3756.2010109810753, 1760.0039502559048, 
1984.1915666341217, 1815.9784004450307, 374.01432817238765, 1525.2825038054202, 
4655.426643726092, 2739.2470950868706, 8.818304406304343, 1.2963038827464648], 
"eval_len": [1000, 479, 495, 467, 278, 410, 1000, 613, 59, 45]}

 46%|████▌     | 459998/1000000 [3:33:01<5:22:34, 27.90it/s]global step 460000, trans_decision ep_re 2114.742604819397

{"global_step": 460000, "eval_re": [1139.5422884975299, 194.23146832493748, 
3031.985512382244, 190.4376641770314, 4471.594178546653, 138.84132399956871, 
3806.6661705758634, 590.0609493389919, 4052.6705459638943, 3531.395946387256], 
"eval_len": [323, 107, 733, 140, 1000, 97, 825, 187, 969, 821]}

 47%|████▋     | 469995/1000000 [3:37:41<4:14:25, 34.72it/s]global step 470000, trans_decision ep_re 2989.718174306589

{"global_step": 470000, "eval_re": [1878.37741088794, 172.56138572421696, 
4683.9242489121725, 4603.865237351398, 4425.304223542071, 3329.2987375544044, 
533.0236399495285, 1136.8446926638671, 4637.013612369147, 4496.968554111144], 
"eval_len": [471, 102, 1000, 1000, 1000, 749, 223, 317, 1000, 1000]}

 48%|████▊     | 479997/1000000 [3:42:31<2:55:17, 49.44it/s]global step 480000, trans_decision ep_re 1884.0790256978787

{"global_step": 480000, "eval_re": [1560.121794220182, 29.236598671926533, 
4652.726426076985, 387.98720035681487, 95.00900989761321, 4225.030955629242, 
4052.5834324714792, 90.30447870197486, 2316.470868601513, 1431.3194923510573], 
"eval_len": [461, 40, 1000, 162, 129, 1000, 1000, 87, 552, 374]}

 49%|████▉     | 489995/1000000 [3:47:11<2:52:49, 49.18it/s]global step 490000, trans_decision ep_re 1663.7789384813484

{"global_step": 490000, "eval_re": [589.5549057201898, 208.81504390427364, 
511.14174256205115, 3181.4674278864964, 2022.2360124451245, 225.9010915230675, 
3855.7150750714754, 3654.5228109604486, 2105.607584253685, 282.82769048667103], 
"eval_len": [224, 111, 193, 729, 533, 122, 824, 862, 480, 142]}

 50%|████▉     | 499995/1000000 [3:51:51<4:38:14, 29.95it/s]global step 500000, trans_decision ep_re 2110.7821768061285

{"global_step": 500000, "eval_re": [4585.840490547557, 1838.8351156930585, 
1649.8506107305802, 2032.6233811167308, 2394.5747230428, 3516.873743012882, 
398.6974182226563, 199.8981397226737, 113.57384167089603, 4377.0543043014495], 
"eval_len": [1000, 508, 436, 513, 601, 831, 159, 107, 102, 1000]}

 51%|█████     | 509997/1000000 [3:56:31<3:11:23, 42.67it/s]global step 510000, trans_decision ep_re 1935.6022446571048

{"global_step": 510000, "eval_re": [209.59644887912415, 81.93370470899369, 
1690.741271478538, 3599.0284084475797, 4681.775939384589, 2565.161700716428, 
72.66630284789021, 103.3342088368202, 4664.575776188729, 1687.2086850823555], 
"eval_len": [98, 67, 485, 779, 1000, 624, 58, 65, 1000, 403]}

 52%|█████▏    | 519997/1000000 [4:01:11<2:42:52, 49.12it/s]global step 520000, trans_decision ep_re 2180.7545240293357

{"global_step": 520000, "eval_re": [3694.9473566500474, 873.5060580460008, 
1460.99249938198, 1948.5944291627086, 3734.1439271261215, 145.28542729834072, 
4707.234258879037, 4730.402908482994, 59.209110088783476, 453.2292651773405], 
"eval_len": [818, 269, 360, 475, 800, 91, 1000, 1000, 48, 162]}

 53%|█████▎    | 529999/1000000 [4:05:36<2:38:15, 49.50it/s]global step 530000, trans_decision ep_re 952.5904353124331

{"global_step": 530000, "eval_re": [-4.659596813365104, 37.60504815220937, 
173.40150143157283, 178.7504054046568, 1134.4976880545885, 37.49786901821404, 
4553.304114496356, 140.53009745026887, 186.7552439071002, 3088.2219820227288], 
"eval_len": [22, 50, 119, 111, 325, 40, 1000, 95, 110, 717]}

 54%|█████▍    | 539995/1000000 [4:10:21<2:34:29, 49.62it/s]global step 540000, trans_decision ep_re 777.9128442376134

{"global_step": 540000, "eval_re": [53.16639355361615, 2255.9728875796864, 
1229.6759662418212, 55.752881860767445, 180.52299548693222, 62.950589547617305, 
67.55050836183572, 1653.417599517176, 2160.3544690322897, 59.76415119439233], 
"eval_len": [58, 540, 331, 57, 99, 64, 65, 410, 510, 59]}

 55%|█████▍    | 549997/1000000 [4:14:42<2:44:01, 45.72it/s]global step 550000, trans_decision ep_re 610.4543197894711

{"global_step": 550000, "eval_re": [122.14238867774345, 761.3793275560329, 
95.70568998042495, 106.26790746761215, 74.36693451802901, 41.57814040962235, 
30.89535447365502, 104.6606264166713, 98.97045546079806, 4668.576372934122], 
"eval_len": [78, 291, 78, 74, 116, 56, 40, 66, 77, 1000]}

 56%|█████▌    | 559999/1000000 [4:19:15<2:29:06, 49.18it/s]global step 560000, trans_decision ep_re 1329.414247156951

{"global_step": 560000, "eval_re": [2549.36360539989, 1489.3379586494789, 
741.2688121728038, 212.67098478408957, 2815.4394121666805, 392.11912965591614, 
3862.955993297004, 269.864068839582, 207.4412209142036, 753.6812856898626], 
"eval_len": [578, 390, 234, 103, 636, 180, 869, 119, 111, 280]}

 57%|█████▋    | 569997/1000000 [4:24:11<2:26:12, 49.02it/s]global step 570000, trans_decision ep_re 2145.712642889273

{"global_step": 570000, "eval_re": [3440.406944251773, 490.52773003769715, 
3448.4074297840643, 2355.883481095363, 4153.717281142509, 2780.0961945821764, 
266.62571554432344, 3705.690546599701, 175.5091657800716, 640.2619400750523], 
"eval_len": [734, 178, 782, 554, 893, 599, 128, 782, 110, 222]}

 58%|█████▊    | 579997/1000000 [4:28:51<2:21:04, 49.62it/s]global step 580000, trans_decision ep_re 2849.1750529618744

{"global_step": 580000, "eval_re": [2598.3975740483397, 3731.19050718808, 
1738.1468335792738, 17.149017929219216, 3484.9099190269944, 2958.8558376885594, 
4738.109866088969, 37.336141317475594, 4613.218065257311, 4574.436767494519], 
"eval_len": [577, 807, 444, 40, 790, 712, 1000, 39, 1000, 1000]}

 59%|█████▉    | 589996/1000000 [4:33:41<2:18:17, 49.41it/s]global step 590000, trans_decision ep_re 2639.0438183289043

{"global_step": 590000, "eval_re": [4776.027062457442, 259.40490042470316, 
1826.6358456782737, 4723.853075585947, 325.9768438758655, 4308.646726476787, 
2661.174020314625, 2308.4756637135247, 2199.1188618058595, 3001.1251829560174], 
"eval_len": [1000, 148, 438, 1000, 135, 1000, 629, 581, 491, 668]}

 60%|█████▉    | 599995/1000000 [4:38:21<2:15:23, 49.24it/s]global step 600000, trans_decision ep_re 1398.615764787361

{"global_step": 600000, "eval_re": [86.08559009239633, 106.47535652171706, 
4334.963828654998, 4111.597309351276, 123.30389777966703, 121.66204086104185, 
4105.331724504658, 121.51631724970319, 734.9133771811802, 140.308205676974], 
"eval_len": [62, 57, 1000, 1000, 60, 68, 1000, 63, 257, 72]}

 61%|██████    | 609997/1000000 [4:42:45<2:11:13, 49.53it/s]global step 610000, trans_decision ep_re 765.890277012083

{"global_step": 610000, "eval_re": [175.7707025879303, 179.43048222702637, 
1244.5594945546698, 465.27434617339594, 143.77244569928772, 4597.811294196011, 
293.8811535322088, 387.2592449466044, 39.39151039237727, 131.7520958113189], 
"eval_len": [91, 94, 372, 171, 69, 1000, 117, 146, 42, 65]}

 62%|██████▏   | 619999/1000000 [4:47:41<2:08:33, 49.26it/s]global step 620000, trans_decision ep_re 3008.9967130923196

{"global_step": 620000, "eval_re": [4926.897688085737, 259.97393758835665, 
84.00929578468346, 2792.591853943853, 4293.3008969133925, 1690.7239425051143, 
3941.9518632217205, 2829.7368778561477, 4574.574646519797, 4696.206128504393], 
"eval_len": [1000, 115, 134, 615, 1000, 387, 851, 614, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [4:52:21<2:04:54, 49.37it/s]global step 630000, trans_decision ep_re 1456.8917568111492

{"global_step": 630000, "eval_re": [209.29278410177886, 1203.2957076118362, 
4668.496077663442, 2581.9582641320826, 4503.895388627556, 221.69924532875308, 
140.2141107562404, 384.94140525072925, 304.17837628272946, 350.9462083563448], 
"eval_len": [102, 305, 1000, 574, 1000, 110, 70, 149, 128, 141]}

 64%|██████▍   | 639996/1000000 [4:57:01<2:37:50, 38.01it/s]global step 640000, trans_decision ep_re 1826.8316246005263

{"global_step": 640000, "eval_re": [1538.7886313133536, 2861.8218879895335, 
213.04432890842182, -3.6419959501824146, 3375.350844099765, 1544.543468609489, 
1297.207309410186, 2928.6942724517226, 1962.1494920763091, 2550.3580070966655], 
"eval_len": [392, 633, 107, 20, 753, 381, 337, 738, 466, 547]}

 65%|██████▍   | 649997/1000000 [5:01:29<1:58:02, 49.42it/s]global step 650000, trans_decision ep_re 328.484081641559

{"global_step": 650000, "eval_re": [130.3377066249216, 129.48423993562807, 
2084.8537935490085, 144.20435295471563, 129.7145845583325, 116.5180832732082, 
155.15169728152645, 129.01325158254008, 135.22745460149918, 130.33565205420965],
"eval_len": [64, 63, 531, 65, 64, 63, 100, 67, 65, 65]}

 66%|██████▌   | 659999/1000000 [5:06:11<1:53:58, 49.72it/s]global step 660000, trans_decision ep_re 1757.5545287026766

{"global_step": 660000, "eval_re": [190.3922477177524, 1756.8500742586168, 
2020.9358789583407, 244.42258020487387, 4736.686220606681, 4470.2405097941755, 
34.64941085252265, 255.4250408846293, 3736.7286728298845, 129.21465091928567], 
"eval_len": [90, 423, 486, 129, 992, 939, 43, 122, 783, 83]}

 67%|██████▋   | 669997/1000000 [5:10:51<1:51:08, 49.49it/s]global step 670000, trans_decision ep_re 1947.5783212380313

{"global_step": 670000, "eval_re": [211.72578607098166, 4980.259316220961, 
3093.215777029299, 131.75896178067228, -2.8146105454584784, 128.8325189742719, 
3019.6983728492232, 3987.648951811125, 1058.2886175715823, 2867.1695206176546], 
"eval_len": [95, 1000, 655, 67, 13, 63, 639, 858, 309, 699]}

 68%|██████▊   | 679995/1000000 [5:15:31<1:47:29, 49.61it/s]global step 680000, trans_decision ep_re 1375.7241264772183

{"global_step": 680000, "eval_re": [571.9574770256726, 2675.0204308074744, 
720.841780489689, 1501.256402260751, 1672.2862419849864, 1046.8632136032093, 
742.1358249448612, 1624.6383364808512, 180.61545182738234, 3021.626105347306], 
"eval_len": [181, 567, 221, 363, 372, 291, 219, 398, 83, 633]}

 69%|██████▉   | 689995/1000000 [5:20:01<1:44:12, 49.58it/s]global step 690000, trans_decision ep_re 809.0564036091466

{"global_step": 690000, "eval_re": [1850.8793380871666, 2333.8053712715696, 
1107.0181688971081, 1221.0254357496115, 142.08280298346196, 396.95971233100437, 
662.0119941067186, 248.64660073137404, 128.66166080453416, -0.5270488710835759],
"eval_len": [419, 516, 334, 322, 80, 144, 225, 112, 66, 13]}

 70%|██████▉   | 699998/1000000 [5:24:23<1:41:13, 49.39it/s]global step 700000, trans_decision ep_re 1829.5366929226159

{"global_step": 700000, "eval_re": [2186.893407389702, 2105.1983753066506, 
5012.798035414703, 2438.6249245800936, 31.856624792581517, 3197.4528784525037, 
33.602340067000576, 49.54700124540016, 1358.508410320545, 1880.884931656977], 
"eval_len": [509, 461, 1000, 541, 39, 651, 41, 48, 351, 473]}

 71%|███████   | 709999/1000000 [5:29:12<1:37:44, 49.45it/s]global step 710000, trans_decision ep_re 2437.364918416107

{"global_step": 710000, "eval_re": [4556.647961009295, 4832.747504195959, 
771.2253652142776, 1309.2887746944914, 1184.132507118198, 2579.242320795471, 
3385.4355262956988, 2110.6902074825357, 1047.0396255634587, 2597.1993917916834],
"eval_len": [1000, 1000, 254, 395, 327, 602, 741, 537, 289, 613]}

 72%|███████▏  | 719997/1000000 [5:34:02<1:34:19, 49.47it/s]global step 720000, trans_decision ep_re 3055.093426721403

{"global_step": 720000, "eval_re": [150.48171406874653, 3346.486441955482, 
3034.607969766078, 3844.6460290303794, 4838.854456308061, 4759.902684112083, 
1449.143250973555, 3909.6736324638787, 4966.102099902585, 251.03598863318385], 
"eval_len": [95, 712, 624, 837, 1000, 1000, 357, 823, 1000, 127]}

 73%|███████▎  | 729997/1000000 [5:38:26<1:30:47, 49.56it/s]global step 730000, trans_decision ep_re 1170.7283982565298

{"global_step": 730000, "eval_re": [250.68347672428496, 28.217498534546575, 
492.7140200084354, 187.1773615434026, 1515.9238698918412, 422.15177027831993, 
1910.8466507538876, 2702.174190895014, 617.0719229753423, 3580.3232209602224], 
"eval_len": [125, 43, 250, 118, 403, 194, 492, 596, 218, 776]}

 74%|███████▍  | 739995/1000000 [5:43:01<1:27:40, 49.43it/s]global step 740000, trans_decision ep_re 78.4054725521583

{"global_step": 740000, "eval_re": [44.97002791469482, 48.89443280423558, 
401.962715883495, 49.46759882618909, 47.689281043215765, 45.86611989498134, 
57.728182126877506, 1.4083955950173621, 46.77332996236782, 39.2946414705087], 
"eval_len": [44, 46, 170, 45, 46, 44, 50, 15, 45, 41]}

 75%|███████▍  | 749999/1000000 [5:47:42<1:24:47, 49.14it/s]global step 750000, trans_decision ep_re 1509.4514912629234

{"global_step": 750000, "eval_re": [2353.631236352425, 1682.9383690930715, 
318.3945886950543, 440.2167690599908, 786.6715758192223, 3211.339017480432, 
284.4381745594045, 2531.3618735825294, 2070.693231682909, 1414.8300763041932], 
"eval_len": [547, 543, 147, 157, 241, 694, 123, 591, 467, 378]}

 76%|███████▌  | 759999/1000000 [5:52:22<1:20:47, 49.51it/s]global step 760000, trans_decision ep_re 2680.315004977071

{"global_step": 760000, "eval_re": [5.868011244615863, 28.13393060674886, 
2159.5326745697785, 4652.155621764663, 1598.2780512820127, 3938.087369755336, 
4839.44013385319, 3986.90496835991, 829.9973603151839, 4764.751928019273], 
"eval_len": [18, 81, 527, 1000, 403, 836, 1000, 835, 251, 1000]}

 77%|███████▋  | 769995/1000000 [5:57:02<1:16:57, 49.81it/s]global step 770000, trans_decision ep_re 2193.1754904447293

{"global_step": 770000, "eval_re": [3247.399982274284, 5068.606178638996, 
139.27279145895412, 2833.4176665105015, 4891.642410217882, 302.49486469038504, 
1311.6825029166355, 2569.0047963186475, 621.9762238718295, 946.2574875491807], 
"eval_len": [717, 1000, 68, 640, 1000, 171, 323, 553, 215, 291]}

 78%|███████▊  | 779995/1000000 [6:01:42<1:13:41, 49.76it/s]global step 780000, trans_decision ep_re 1556.1373779529092

{"global_step": 780000, "eval_re": [287.23763842635697, 198.242541814019, 
173.47216040227576, 4841.456779602292, 21.54569819072361, 239.8473197185576, 
2918.49639645458, 45.88963385313823, 3473.978346765517, 3361.2072643016304], 
"eval_len": [110, 100, 89, 1000, 31, 104, 659, 44, 726, 754]}

 79%|███████▉  | 789995/1000000 [6:06:06<2:52:24, 20.30it/s]global step 790000, trans_decision ep_re 976.0313144110858

{"global_step": 790000, "eval_re": [4970.136032850733, 779.9513564843695, 
603.4059727741245, 118.41490141997937, 436.83226356042405, 168.57388548510465, 
328.7253611841673, 615.1430152535177, 1565.079859034844, 174.05049606359464], 
"eval_len": [1000, 234, 243, 59, 174, 84, 145, 208, 399, 98]}

 80%|███████▉  | 799999/1000000 [6:10:42<1:07:04, 49.70it/s]global step 800000, trans_decision ep_re 1906.1616857072459

{"global_step": 800000, "eval_re": [2911.9451268541643, 2963.143080877335, 
1031.6609175269793, 127.06195823977953, 3207.465735952826, 1266.7124643282764, 
2083.6114067638528, 332.97669649869846, 478.50534067255063, 4658.534129357996], 
"eval_len": [657, 639, 289, 73, 852, 353, 499, 134, 185, 1000]}

 81%|████████  | 809995/1000000 [6:15:22<1:03:52, 49.57it/s]global step 810000, trans_decision ep_re 1948.988385772078

{"global_step": 810000, "eval_re": [300.9421835289636, 1908.209546132613, 
1179.1871801958027, 1509.5835209968247, 109.98650002955699, 221.97654363001843, 
2648.070602729973, 5073.156546714061, 1739.7333845468304, 4799.0378492161335], 
"eval_len": [129, 549, 299, 359, 84, 107, 606, 1000, 480, 1000]}

 82%|████████▏ | 819997/1000000 [6:20:12<1:00:32, 49.56it/s]global step 820000, trans_decision ep_re 1362.1515519542977

{"global_step": 820000, "eval_re": [527.866469960516, 124.99388791392789, 
2856.199025421359, 136.14846577633483, 196.21933368368562, 130.68365924369039, 
122.9566933280121, 1043.80058200928, 3581.0017581854645, 4901.645644020706], 
"eval_len": [182, 56, 625, 93, 99, 63, 60, 283, 765, 1000]}

 83%|████████▎ | 829999/1000000 [6:24:39<57:42, 49.09it/s]global step 830000, trans_decision ep_re 592.8929071389628

{"global_step": 830000, "eval_re": [113.20232396614784, 2166.123353258063, 
242.29189554259037, 141.72412017508285, 2552.599956943774, 89.56527827664883, 
44.04960438700412, 234.57932789345026, 172.93634446924244, 171.85686647762398], 
"eval_len": [51, 506, 111, 61, 588, 63, 44, 103, 87, 85]}

 84%|████████▍ | 839996/1000000 [6:29:22<53:36, 49.74it/s]global step 840000, trans_decision ep_re 1671.4921573571369

{"global_step": 840000, "eval_re": [1440.518279149147, 177.89629233008947, 
4072.9390618753914, 935.9074251361136, 613.8898187360107, 398.1066169642295, 
2465.914432461855, 1835.4639539878672, 115.41182910454263, 4658.873863826122], 
"eval_len": [387, 109, 909, 292, 198, 161, 587, 446, 54, 957]}

 85%|████████▍ | 849999/1000000 [6:34:02<50:07, 49.87it/s]global step 850000, trans_decision ep_re 2624.84580120177

{"global_step": 850000, "eval_re": [2879.081288316104, 385.0991625011296, 
1949.5781098363238, -1.1854559864242478, 2848.3031685125343, 4904.124109295038, 
167.38084970615196, 3468.7098706968222, 4893.003488301776, 4754.3634208382455], 
"eval_len": [619, 149, 445, 14, 611, 1000, 84, 718, 1000, 999]}

 86%|████████▌ | 859997/1000000 [6:38:42<46:46, 49.89it/s]global step 860000, trans_decision ep_re 1007.5007747483203

{"global_step": 860000, "eval_re": [4714.204212311973, 174.63255120086936, 
193.44963008293166, 171.44415976919313, 2168.293009261328, 34.20705797213434, 
2405.7522349050932, 130.01682920320633, 40.09162203109608, 42.91644074537836], 
"eval_len": [1000, 106, 102, 113, 494, 54, 544, 147, 56, 59]}

 87%|████████▋ | 869997/1000000 [6:43:05<44:11, 49.02it/s]global step 870000, trans_decision ep_re 994.9609721171348

{"global_step": 870000, "eval_re": [715.5027355391613, 42.84714836004561, 
69.97242182511116, 74.3045522187665, 232.73715160364935, 217.35065436346858, 
4369.046451606843, 1223.1705172255217, 446.7972010139939, 2557.8808874147876], 
"eval_len": [249, 43, 59, 61, 97, 103, 923, 338, 175, 539]}

 88%|████████▊ | 879997/1000000 [6:47:38<40:17, 49.64it/s]global step 880000, trans_decision ep_re 706.0559550470136

{"global_step": 880000, "eval_re": [1217.0357940605572, 2547.8963857966505, 
893.9568422285695, 264.8186555258856, 196.03468638744116, 192.75090000486853, 
201.2043237762453, 260.036654641324, 206.67951878302446, 1080.1457892655692], 
"eval_len": [339, 543, 246, 103, 76, 76, 95, 103, 107, 291]}

 89%|████████▉ | 889995/1000000 [6:52:22<37:22, 49.05it/s]global step 890000, trans_decision ep_re 1436.550611850546

{"global_step": 890000, "eval_re": [1970.8646308342327, 1793.8037910980229, 
4300.784354671215, 146.0953861288961, 243.69417875298848, 4469.628284304504, 
930.0722954188288, 235.336648511707, 123.90875828095099, 151.31779050411444], 
"eval_len": [427, 436, 870, 82, 101, 895, 249, 100, 77, 62]}

 90%|████████▉ | 899997/1000000 [6:57:02<33:39, 49.51it/s]global step 900000, trans_decision ep_re 1693.6091714218942

{"global_step": 900000, "eval_re": [229.53608606676335, 834.6818393824485, 
1160.2677321265414, 952.2136488184616, 638.3062754586994, 4747.8618482956335, 
2091.59050956572, 3647.4976232900544, 1001.0032029370217, 1633.1329482776], 
"eval_len": [107, 250, 291, 262, 219, 1000, 483, 782, 267, 375]}

 91%|█████████ | 909995/1000000 [7:01:25<30:15, 49.56it/s]global step 910000, trans_decision ep_re 1563.999976828394

{"global_step": 910000, "eval_re": [520.7006553224223, 1795.8323409917255, 
1311.7789522756436, 4006.04550586089, 3492.4008959997627, 1059.9925798792706, 
249.8681737863541, 2338.5743066811037, 222.51875706250817, 642.2876004242607], 
"eval_len": [158, 419, 315, 837, 729, 280, 107, 518, 97, 193]}

 92%|█████████▏| 919999/1000000 [7:06:22<26:50, 49.67it/s]global step 920000, trans_decision ep_re 2292.0098701228994

{"global_step": 920000, "eval_re": [1109.823841929277, 4779.6269978799, 
1067.9033773757078, 4368.727895019932, 261.96951665726334, 4797.633832515418, 
1235.1447066133949, 4814.452559972795, 272.3654756822337, 212.4504975830699], 
"eval_len": [297, 1000, 362, 866, 108, 1000, 326, 1000, 120, 102]}

 93%|█████████▎| 929997/1000000 [7:10:45<23:26, 49.76it/s]global step 930000, trans_decision ep_re 1010.5888909222246

{"global_step": 930000, "eval_re": [1115.462563133837, 377.11167154560405, 
1683.2382114930447, 1693.1358527560017, 183.50493320932543, 288.9612919510888, 
-5.466918236185874, 403.75202300017656, 219.44005525437396, 4146.749225114978], 
"eval_len": [313, 134, 403, 398, 86, 146, 18, 129, 89, 946]}

 94%|█████████▍| 939997/1000000 [7:15:33<20:16, 49.32it/s]global step 940000, trans_decision ep_re 1298.4553863465305

{"global_step": 940000, "eval_re": [4281.447940879744, 2502.8215340518277, 
1340.434242668202, 291.4866881182725, 3169.666048865586, 570.2261854932732, 
215.30596380153136, 188.59127007288149, 387.31521722211045, 37.2587722918764], 
"eval_len": [929, 552, 381, 133, 721, 174, 96, 88, 128, 35]}

 95%|█████████▍| 949998/1000000 [7:19:55<16:47, 49.61it/s]global step 950000, trans_decision ep_re 623.9748955793209

{"global_step": 950000, "eval_re": [33.018826464743725, 261.11095814752383, 
340.9769946885216, 936.761687510925, 225.98570342381083, 129.4185877442732, 
25.706926403197535, 248.32523682622488, 1718.9517665234562, 2319.4922680605337],
"eval_len": [33, 101, 130, 243, 94, 61, 37, 170, 404, 541]}

 96%|█████████▌| 959997/1000000 [7:24:26<13:27, 49.53it/s]global step 960000, trans_decision ep_re 1677.7557733309757

{"global_step": 960000, "eval_re": [843.4441919919426, 211.3621071388095, 
395.1488051546375, 4919.84196598489, 166.4684364969398, 235.9942549242998, 
126.09281404326667, 4946.2009147592835, 2068.8730744799627, 2864.131168335726], 
"eval_len": [287, 83, 154, 1000, 77, 97, 80, 1000, 462, 604]}

 97%|█████████▋| 969997/1000000 [7:29:13<10:07, 49.41it/s]global step 970000, trans_decision ep_re 1513.6762322821164

{"global_step": 970000, "eval_re": [251.14652728300908, 214.26126813145058, 
5137.694047997964, 3616.941160807315, 1277.2970851615312, 369.22112284855825, 
342.6547265335681, 126.09940299134159, 1724.4052613125943, 2077.0417197538304], 
"eval_len": [99, 97, 1000, 758, 307, 130, 140, 66, 366, 462]}

 98%|█████████▊| 979995/1000000 [7:33:37<06:41, 49.79it/s]global step 980000, trans_decision ep_re 1289.5246184680325

{"global_step": 980000, "eval_re": [470.82987242644924, 29.960127975244458, 
240.57693087730888, 364.87615541566703, 3256.5348998901236, 469.45493492341717, 
4677.973625308368, 2993.104240931072, 187.55045614611842, 204.38494078655603], 
"eval_len": [200, 39, 92, 130, 728, 162, 1000, 613, 86, 95]}

 99%|█████████▉| 989999/1000000 [7:38:23<03:22, 49.36it/s]global step 990000, trans_decision ep_re 1802.4226369916037

{"global_step": 990000, "eval_re": [2876.8730158721237, 1161.1990199047164, 
4373.919840349275, 231.7506272916268, 1272.6462837209385, 31.006247432677277, 
5130.325870708423, 182.35887382462948, 222.7159939824271, 2541.4305968291987], 
"eval_len": [582, 296, 854, 112, 304, 33, 1000, 87, 78, 554]}

100%|█████████▉| 999995/1000000 [7:42:46<00:00, 46.72it/s]global step 1000000, trans_decision ep_re 1137.2957688211122

{"global_step": 1000000, "eval_re": [1566.3411498960843, 486.0521267509973, 
3173.7856605506654, 3108.6044537940948, 896.3085961885629, 29.08380497279444, 
1294.0052832750516, 175.08933293531675, 19.23929625563048, 624.4479835919283], 
"eval_len": [367, 177, 655, 712, 264, 33, 334, 85, 31, 194]}

100%|██████████| 1000000/1000000 [7:43:00<00:00, 36.00it/s]
