
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9995/1000000 [02:40<5:36:10, 49.08it/s]global step 10000, trans_decision ep_re 31.92286198005396

{"global_step": 10000, "eval_re": [28.16901693668065, 28.22462423085119, 
21.27579923891995, 49.05295170179222, 27.345763409243713, 33.25724599291963, 
67.00729967705945, 15.887763722172837, 17.408158628478457, 31.59999626242151], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19995/1000000 [07:40<5:31:40, 49.24it/s]global step 20000, trans_decision ep_re 1034.410337536673

{"global_step": 20000, "eval_re": [1054.3961863607792, 1066.4415696425035, 
1123.9289476352149, 928.1027770984576, 1084.1814172037687, 1069.0375368097164, 
1274.8532461535976, 1183.0923846125972, 558.5985671562964, 1001.4707426937982], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [12:40<5:26:56, 49.45it/s]global step 30000, trans_decision ep_re 1600.5350612561308

{"global_step": 30000, "eval_re": [1120.239096174301, 1847.5351103169598, 
1440.0727492525027, 1869.594072884564, 2013.2227110891267, 1353.495779959722, 
1754.7435997771663, 1918.9905355158767, 2037.9572910263635, 649.4996665647252], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [17:30<5:24:49, 49.26it/s]global step 40000, trans_decision ep_re 2303.804583883748

{"global_step": 40000, "eval_re": [2267.992686355956, 2154.826669900762, 
2457.2092349104137, 2411.030369135464, 2471.8123118987005, 2125.1731832379323, 
2057.4984844740115, 2550.412622695994, 2398.513989049027, 2143.576287179217], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49997/1000000 [22:30<5:17:11, 49.92it/s]global step 50000, trans_decision ep_re 2467.986378314301

{"global_step": 50000, "eval_re": [2570.3505219323283, 2093.72559162321, 
756.9168982459086, 2772.9105029434622, 2476.847629172616, 2784.9766128547226, 
2914.970925466387, 2611.8872433608335, 2704.297080375567, 2992.980777167973], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [27:20<5:13:26, 49.98it/s]global step 60000, trans_decision ep_re 2445.3553861714277

{"global_step": 60000, "eval_re": [3211.369683549764, 3071.29869768526, 
3047.026621783853, 908.1986501737655, 2936.7995477836275, 2890.7266342490334, 
3271.899033433956, 967.9544452158779, 972.3105055635884, 3175.970042275551], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [32:10<5:10:00, 50.00it/s]global step 70000, trans_decision ep_re 3023.803301868699

{"global_step": 70000, "eval_re": [2932.281439169721, 1167.896765081434, 
2937.9656439879636, 3534.3250087995943, 2869.4116462586853, 3401.1675588318617, 
3833.3714900031523, 2954.2783107199034, 3699.84854393445, 2907.4866119002204], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79995/1000000 [37:00<5:06:41, 50.00it/s]global step 80000, trans_decision ep_re 3092.1689876136634

{"global_step": 80000, "eval_re": [3049.6162513207555, 716.885886000117, 
3289.572083609385, 4000.5352520608317, 3246.7036090805545, 3449.8089787917233, 
3046.8278322170786, 3494.021716804864, 3186.5637905840226, 3441.154475667304], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [41:50<5:02:33, 50.13it/s]global step 90000, trans_decision ep_re 2892.7261796959715

{"global_step": 90000, "eval_re": [2012.1894162312087, 691.1210789060888, 
3140.166991904856, 3656.902876267453, 3726.5789493926327, 3513.6830335105587, 
2384.940225961224, 3133.0189746649826, 3470.9714718096498, 3197.688778311061], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [46:40<4:58:26, 50.26it/s]global step 100000, trans_decision ep_re 3264.780578517078

{"global_step": 100000, "eval_re": [3887.9090135656684, 3673.087243690089, 
3534.861027980863, 1099.1094575618456, 3338.068286627227, 3366.4380434874615, 
3202.7833695077843, 3875.4462610969613, 3166.599721879569, 3503.503359773308], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109995/1000000 [51:30<4:56:41, 50.00it/s]global step 110000, trans_decision ep_re 3744.110448471888

{"global_step": 110000, "eval_re": [3542.7511377344035, 3920.7785001737257, 
4309.386686068696, 3615.3548475812418, 3426.2731080165563, 3803.949847646664, 
3600.324139623838, 3601.5355666449614, 3862.6218324884117, 3758.128818740381], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119997/1000000 [56:20<4:52:41, 50.11it/s]global step 120000, trans_decision ep_re 3460.3888575571937

{"global_step": 120000, "eval_re": [4049.6394593103532, 3990.9319143089106, 
1623.1347236422675, 3843.2592903333702, 3140.1346978448637, 2815.080270165652, 
4062.655082628748, 3692.388101263397, 3767.5934653737327, 3619.071570700644], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129995/1000000 [1:01:10<4:50:16, 49.95it/s]global step 130000, trans_decision ep_re 3983.6148552519494

{"global_step": 130000, "eval_re": [2801.1097330064363, 3707.7344627558027, 
4004.1488849423117, 4153.110664910524, 4114.956253842685, 4223.024113411708, 
4135.025458128421, 4135.016787741614, 4104.780595940164, 4457.24159783983], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:06:00<4:45:52, 50.14it/s]global step 140000, trans_decision ep_re 3313.3490758551134

{"global_step": 140000, "eval_re": [3882.4160682799443, 3954.4749583598605, 
1455.2708990308354, 3816.4653226250152, 3918.949600947471, 4120.246782917058, 
3490.095515797739, 1025.226298801694, 3698.90925711926, 3771.436054672252], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149995/1000000 [1:10:50<4:43:23, 49.99it/s]global step 150000, trans_decision ep_re 3901.3334113672363

{"global_step": 150000, "eval_re": [3970.164398987215, 1498.9000098313384, 
3628.6287156725266, 3717.430952420527, 4699.902058846335, 4556.279662291925, 
4283.180370625037, 3956.766191502395, 4436.785254818611, 4265.2964986764455], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159999/1000000 [1:15:40<4:39:27, 50.10it/s]global step 160000, trans_decision ep_re 3934.945238359191

{"global_step": 160000, "eval_re": [4308.547508465773, 3548.1198150482346, 
4496.724949651469, 2683.2579153193014, 4095.527196654883, 4267.170709124919, 
4222.647918739997, 4413.511352655323, 4119.86198789208, 3194.0830300399266], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169995/1000000 [1:20:30<4:36:56, 49.95it/s]global step 170000, trans_decision ep_re 3890.942805660748

{"global_step": 170000, "eval_re": [3570.8120780387217, 4397.752739435271, 
3790.9157033013416, 3470.6383752605216, 4483.902593860172, 3891.1111098129686, 
3307.4726261772844, 4121.807518280419, 4053.8977429451234, 3821.1175694956573], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179999/1000000 [1:25:20<4:32:30, 50.15it/s]global step 180000, trans_decision ep_re 3753.0019738879105

{"global_step": 180000, "eval_re": [3794.3276357475756, 4638.736749442371, 
3354.2037359708816, 4063.5889755690655, 4213.218164091087, 3623.7542972162455, 
3852.0011572820354, 2011.677519296988, 4311.603654646673, 3666.9078496161796], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189995/1000000 [1:30:10<7:11:59, 31.25it/s]global step 190000, trans_decision ep_re 3788.6047558186087

{"global_step": 190000, "eval_re": [3710.5406237992456, 4382.261039966665, 
3616.74623305305, 3837.294347435236, 2317.5408132829007, 4371.783446761203, 
4113.653692436511, 4272.190517715095, 3676.132871484825, 3587.9039722513576], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199997/1000000 [1:35:00<4:26:56, 49.95it/s]global step 200000, trans_decision ep_re 3983.5349041018553

{"global_step": 200000, "eval_re": [3991.5787044658355, 4479.671798076999, 
4031.290046361039, 3611.1946053432616, 3886.694340725953, 3316.520053699879, 
4536.622756267786, 3831.4143414962105, 4020.0131818591526, 4130.349212722436], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209995/1000000 [1:39:50<4:23:29, 49.97it/s]global step 210000, trans_decision ep_re 3487.789950721136

{"global_step": 210000, "eval_re": [3844.2575336642403, 1031.2307969246742, 
4430.648108856628, 4230.026035111294, 3958.8590526971707, 4217.964327725323, 
4412.375824427626, 1001.0109404564356, 3926.6198120620647, 3824.907075285909], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219997/1000000 [1:44:40<4:20:17, 49.94it/s]global step 220000, trans_decision ep_re 3476.07400527234

{"global_step": 220000, "eval_re": [3887.8171333318296, 4156.068520900515, 
3760.138535444894, 3738.8733318239783, 3827.659629261635, 4164.613029449852, 
783.9203444941846, 1730.7586644241846, 4635.723289017481, 4075.167574574841], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229997/1000000 [1:49:40<4:17:12, 49.90it/s]global step 230000, trans_decision ep_re 3362.7507039132215

{"global_step": 230000, "eval_re": [1273.8598884722155, 4197.373909413996, 
3533.057706975942, 3965.460492343483, 3858.96063084799, 3764.6114103983537, 
4150.192943905343, 1034.4800114267696, 4092.964441448205, 3756.5456038999123], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [1:54:30<4:12:45, 50.12it/s]global step 240000, trans_decision ep_re 3971.001002003836

{"global_step": 240000, "eval_re": [3954.123687505943, 4358.549453126581, 
4265.7907309900465, 3467.1231307114185, 3792.606226141842, 4327.868569857669, 
3939.812830508111, 3636.6413512615504, 4090.6467871715763, 3876.8472527636213], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249995/1000000 [1:59:20<4:09:27, 50.11it/s]global step 250000, trans_decision ep_re 4217.923175926318

{"global_step": 250000, "eval_re": [3829.4482943963867, 3991.701217559473, 
4385.716054601941, 4359.870109212778, 4141.04127899486, 3932.833215093644, 
4498.544885319388, 4204.2763141789965, 4337.871507715642, 4497.928882190073], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259995/1000000 [2:04:10<4:07:21, 49.86it/s]global step 260000, trans_decision ep_re 4001.083300104754

{"global_step": 260000, "eval_re": [4712.546899907617, 2602.1731698313056, 
3806.510311915434, 4664.233758272525, 4147.348295039176, 3770.6590202069974, 
4308.825010233933, 4296.6844617676115, 4222.788058749361, 3479.0640151235775], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269995/1000000 [2:09:00<4:03:01, 50.07it/s]global step 270000, trans_decision ep_re 3811.0257097427484

{"global_step": 270000, "eval_re": [4375.776050052239, 4329.607418775756, 
4446.574176150175, 3818.021649537611, 3934.230703015931, 4445.013221471665, 
2849.962021585168, 1187.4933406578953, 4078.034431464055, 4645.544084716991], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279997/1000000 [2:13:50<3:59:43, 50.06it/s]global step 280000, trans_decision ep_re 3930.055881787782

{"global_step": 280000, "eval_re": [4631.2334236416855, 4045.6905650362883, 
4272.502247910809, 4316.896660651144, 4209.776410373644, 4673.669654002052, 
4057.822917561675, 3729.51264182182, 3829.056242673304, 1534.3980542053985], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [2:18:40<3:55:49, 50.18it/s]global step 290000, trans_decision ep_re 3501.1881566838697

{"global_step": 290000, "eval_re": [4168.616380101858, 3050.5547790386686, 
3453.0370566497463, 3785.1042590853745, 3967.919318585869, 2957.492982654295, 
3917.064715535566, 1509.036546804458, 4314.811482895121, 3888.2440454877324], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299995/1000000 [2:23:30<3:52:42, 50.14it/s]global step 300000, trans_decision ep_re 3910.135957922129

{"global_step": 300000, "eval_re": [3610.766122146659, 3710.6503657240105, 
3938.325586831106, 3730.1801660285337, 4139.509736877366, 4136.792788275686, 
3976.109582368571, 3968.846000606697, 3899.5364980392173, 3990.642732323436], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [2:28:20<3:49:25, 50.13it/s]global step 310000, trans_decision ep_re 3894.8342741036627

{"global_step": 310000, "eval_re": [4114.082907235228, 2366.7341191542414, 
4245.153982375934, 4081.5495077267115, 3631.771552809875, 4339.903664345133, 
3913.1847397820648, 4240.188286987594, 4037.379114873186, 3978.394865746661], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319997/1000000 [2:33:10<3:45:47, 50.19it/s]global step 320000, trans_decision ep_re 3949.313288569361

{"global_step": 320000, "eval_re": [3887.785953140296, 4391.584277685552, 
4553.57117433423, 2965.4126834567815, 1992.6469465720654, 4481.488864274034, 
4110.795229421112, 4111.532687110193, 4509.435499137286, 4488.879570562055], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [2:38:10<3:42:35, 50.17it/s]global step 330000, trans_decision ep_re 3926.2476431462565

{"global_step": 330000, "eval_re": [4256.219593065901, 4061.1543451582997, 
4477.741050619179, 2015.1595141515825, 3499.5189140754314, 4613.738017578146, 
4028.3957607003326, 4040.8208843852553, 4041.0819236454563, 4228.6464280829805],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [2:43:00<3:38:37, 50.31it/s]global step 340000, trans_decision ep_re 3639.7504066011484

{"global_step": 340000, "eval_re": [1021.1442822945751, 3584.9105174674323, 
4377.759625396117, 4011.861194980835, 3954.1753442724894, 3940.1816877847723, 
3922.7538885589033, 4254.074771705404, 4887.228150945397, 2443.4146026055605], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349995/1000000 [2:47:50<3:34:54, 50.41it/s]global step 350000, trans_decision ep_re 4140.09044099307

{"global_step": 350000, "eval_re": [4628.247135861487, 4318.138118197147, 
4422.519664119216, 3931.197933902461, 4561.347948292838, 4056.3255012797767, 
3761.4311485312305, 3399.5408292947154, 4451.094486573799, 3871.0616438780257], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359995/1000000 [2:52:40<3:32:34, 50.18it/s]global step 360000, trans_decision ep_re 4029.343091475621

{"global_step": 360000, "eval_re": [4378.536266122684, 4315.130286791515, 
4539.4278990374105, 3863.9290075794593, 2771.333500967617, 3912.9353176594495, 
4283.596932378618, 4305.8662513624395, 4033.188923579452, 3889.4865292775658], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369997/1000000 [2:57:30<3:29:32, 50.11it/s]global step 370000, trans_decision ep_re 4205.971176858245

{"global_step": 370000, "eval_re": [4223.251920122151, 3515.460443832951, 
4143.121765255433, 5169.033052630754, 4202.826302692987, 4169.250980494293, 
3945.5822181541903, 4263.238973195538, 4538.875965578791, 3889.0701466253486], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379997/1000000 [3:02:20<3:25:28, 50.29it/s]global step 380000, trans_decision ep_re 4265.429895508384

{"global_step": 380000, "eval_re": [4214.753679762052, 4273.354447524266, 
4529.280729952302, 3995.12295460857, 4433.276501416452, 3821.4324786866273, 
4324.755299336717, 4480.560770518726, 4551.934776878464, 4029.8273163996623], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389995/1000000 [3:07:10<3:22:39, 50.17it/s]global step 390000, trans_decision ep_re 4312.78298162429

{"global_step": 390000, "eval_re": [4529.348394028082, 4454.124527691021, 
4892.81345315891, 4145.572987952878, 4590.745218718698, 3671.534843591074, 
4554.706745473359, 4344.490027732052, 4004.8657476068256, 3939.6278702899976], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [3:12:00<3:20:45, 49.81it/s]global step 400000, trans_decision ep_re 3313.704068044374

{"global_step": 400000, "eval_re": [4149.50650609504, 1755.5111075354876, 
4323.057927935611, 4360.145834710993, 4267.037068369386, 850.727016060611, 
4284.895571941497, 4168.422735154243, 4184.155316347081, 793.5815962937868], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [3:17:00<3:16:56, 49.93it/s]global step 410000, trans_decision ep_re 3887.441001228903

{"global_step": 410000, "eval_re": [4188.027350403612, 3656.250245071786, 
3360.8923406296713, 3955.097219384443, 4166.08992321198, 3822.3331066203914, 
4132.008781184843, 3988.257380987717, 3923.0995482874246, 3682.354116507161], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [3:21:50<3:12:36, 50.19it/s]global step 420000, trans_decision ep_re 3993.9160348150776

{"global_step": 420000, "eval_re": [3621.547442837519, 4441.255441680934, 
4126.604180120983, 4060.724215753178, 3454.9296577593987, 4150.5170271086145, 
3959.4708901663066, 4436.663947717575, 3899.4973580319115, 3787.950186974355], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429995/1000000 [3:26:40<3:09:54, 50.03it/s]global step 430000, trans_decision ep_re 4030.4058037429627

{"global_step": 430000, "eval_re": [4909.721328644893, 3583.9836902779884, 
4066.8199685577565, 2741.835966973255, 3280.539336811694, 3964.519773256793, 
4182.617156677435, 4782.272603756324, 4094.56365598103, 4697.184556492455], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439995/1000000 [3:31:30<3:06:22, 50.08it/s]global step 440000, trans_decision ep_re 4359.848043573369

{"global_step": 440000, "eval_re": [4670.380516890034, 3863.670682202602, 
4101.668099415091, 4382.111307234512, 4232.692053052396, 3910.344188033297, 
4383.1021535771015, 4660.696742536064, 4814.615530688579, 4579.199162104017], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [3:36:20<3:02:21, 50.27it/s]global step 450000, trans_decision ep_re 4105.971456789247

{"global_step": 450000, "eval_re": [3308.888457845957, 3012.256706495958, 
4169.156202403653, 4444.9041513783395, 4571.971796935329, 3996.5171338463488, 
4530.647000445538, 4323.036936624741, 4372.974650007888, 4329.36153190872], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459995/1000000 [3:41:10<2:58:47, 50.34it/s]global step 460000, trans_decision ep_re 3965.7522721451737

{"global_step": 460000, "eval_re": [3847.3553214167, 4393.338931068965, 
4092.504481956818, 3684.8299383008693, 4088.3216850053823, 4331.627957742997, 
3698.807200757511, 3896.399668239036, 3911.5621982241, 3712.7753387393614], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469997/1000000 [3:46:00<2:55:40, 50.28it/s]global step 470000, trans_decision ep_re 3995.982690948024

{"global_step": 470000, "eval_re": [4159.763958488341, 4749.610022134573, 
4110.906826549517, 3990.559730967099, 3841.663247930172, 4274.021576445102, 
1952.5564050904268, 4232.964009653451, 4557.681132809672, 4090.0999994118815], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [3:50:50<2:52:45, 50.17it/s]global step 480000, trans_decision ep_re 3960.238973070186

{"global_step": 480000, "eval_re": [4125.085862581435, 1423.246925804177, 
4253.654000403254, 4236.075137589408, 4212.353354666456, 4537.3967673701945, 
3035.1136161108666, 4653.031535870064, 4531.565429936727, 4594.867100369291], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489997/1000000 [3:55:40<2:48:54, 50.32it/s]global step 490000, trans_decision ep_re 3876.3406982505157

{"global_step": 490000, "eval_re": [1832.5281899960537, 3797.842972461777, 
3977.615313588961, 4579.975829606649, 4044.655501162581, 4308.182640231712, 
4259.120692423895, 3520.584138507932, 3882.005969641579, 4560.895734884023], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [4:00:30<2:46:19, 50.10it/s]global step 500000, trans_decision ep_re 3872.4726314438312

{"global_step": 500000, "eval_re": [4648.248737893064, 3841.9002787883105, 
3833.6052182972353, 4590.0081560711515, 4215.53054203689, 1473.841114296958, 
4391.341704353079, 4043.198018580907, 3627.628177049948, 4059.424367070766], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [4:05:20<2:42:30, 50.26it/s]global step 510000, trans_decision ep_re 3662.632984001538

{"global_step": 510000, "eval_re": [4299.880177272867, 4449.15767523263, 
4801.776602956626, 387.4130289207851, 1591.4452013738596, 4261.931277534411, 
4253.553718783311, 4272.6937202516265, 4123.522578722457, 4184.955858966807], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [4:10:10<2:39:47, 50.07it/s]global step 520000, trans_decision ep_re 4260.841874028853

{"global_step": 520000, "eval_re": [4459.116958556422, 3971.52623000116, 
4414.413075098943, 4403.785255316181, 4467.607145151953, 4389.160394283923, 
2945.8269975888406, 3888.136342762227, 5099.04503551738, 4569.801306011506], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529997/1000000 [4:15:00<2:37:22, 49.78it/s]global step 530000, trans_decision ep_re 4381.253504735926

{"global_step": 530000, "eval_re": [4713.302860043648, 4691.497714068585, 
4395.185544539315, 4203.803509282137, 3254.832359347305, 4088.6179050161104, 
4601.80309632552, 4755.378313563725, 4442.951068658962, 4665.162676513951], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [4:19:50<2:33:08, 50.06it/s]global step 540000, trans_decision ep_re 4150.324421723048

{"global_step": 540000, "eval_re": [4146.068560192972, 4081.0065815241824, 
4157.986857934775, 3671.4357113757233, 4491.626093108917, 4346.28607930358, 
4121.4243852118625, 3529.1228879831556, 4560.714564190109, 4397.5724964052], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [4:24:40<2:30:20, 49.89it/s]global step 550000, trans_decision ep_re 4399.86251278965

{"global_step": 550000, "eval_re": [4304.559593129297, 4696.252089159487, 
4461.000789420287, 4600.400658457371, 4633.343756363428, 4095.420764789196, 
3793.3045604280187, 4624.690602662526, 4168.598643096823, 4621.0536703900725], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559995/1000000 [4:29:30<2:26:24, 50.09it/s]global step 560000, trans_decision ep_re 4369.925115825529

{"global_step": 560000, "eval_re": [4516.445721768096, 4872.297391756292, 
4914.266894836607, 4176.150835625579, 5037.147260965696, 4373.9531975674545, 
4269.140997084634, 4087.555501418516, 3621.8955522214133, 3830.3978050110036], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569997/1000000 [4:34:20<2:22:51, 50.17it/s]global step 570000, trans_decision ep_re 4473.724424284029

{"global_step": 570000, "eval_re": [4628.647550421798, 4596.880203197204, 
4727.1080784077885, 5167.258850272273, 2580.292062007117, 4535.320763794351, 
4811.38570933129, 4108.538929909677, 4785.234966009904, 4796.577129488884], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579995/1000000 [4:39:20<2:19:27, 50.19it/s]global step 580000, trans_decision ep_re 4067.0586911927944

{"global_step": 580000, "eval_re": [4280.160921197664, 3941.779338244462, 
3912.4683988268544, 4900.028825423617, 3580.4446754151923, 4364.858859325508, 
3703.3267138370793, 3968.8300575631156, 4030.975670430533, 3987.713451663917], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589995/1000000 [4:44:10<2:15:51, 50.30it/s]global step 590000, trans_decision ep_re 4343.506848991955

{"global_step": 590000, "eval_re": [4173.3645620623765, 2733.748279370134, 
4518.279551139344, 4053.0225096622416, 4622.817233569892, 4675.266649214337, 
4520.172702288233, 4934.406421806905, 4849.568669663263, 4354.421911142827], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599997/1000000 [4:49:00<2:13:04, 50.10it/s]global step 600000, trans_decision ep_re 4374.30253602758

{"global_step": 600000, "eval_re": [4696.528968925603, 4975.166268378458, 
4359.173336244362, 4048.4242782443785, 4490.561550274459, 3060.4748542021935, 
4851.018087885649, 4323.209267239769, 4587.302446834184, 4351.166302046737], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609997/1000000 [4:53:50<2:09:20, 50.25it/s]global step 610000, trans_decision ep_re 4072.708159180751

{"global_step": 610000, "eval_re": [4245.938585469139, 4190.602983402243, 
4125.027984865027, 4166.660719781476, 3996.2397763999716, 4332.986966115112, 
3680.7559923814306, 3824.9885795046, 4015.6362046491313, 4148.243799239385], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619997/1000000 [4:58:40<2:06:38, 50.01it/s]global step 620000, trans_decision ep_re 3824.767194075611

{"global_step": 620000, "eval_re": [4295.416158218253, 1765.8691528259963, 
3993.379019288327, 4023.5909398371127, 4408.248698155664, 4213.57232086468, 
4159.7768449285595, 3237.8233470128607, 3981.2877237093367, 4168.707735915325], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629995/1000000 [5:03:30<2:03:17, 50.02it/s]global step 630000, trans_decision ep_re 4319.2841651283925

{"global_step": 630000, "eval_re": [4417.107120731254, 4127.321614971162, 
4623.127675027667, 4404.577307438498, 4298.1944261640665, 4297.090748497919, 
4607.866333963745, 3752.946871683268, 4380.22299827491, 4284.38655453143], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639995/1000000 [5:08:20<2:00:06, 49.96it/s]global step 640000, trans_decision ep_re 3998.3469792472374

{"global_step": 640000, "eval_re": [4055.618118592322, 2136.6082918421507, 
4382.407275554491, 4463.696997204989, 3814.7798204584337, 3786.467789929781, 
4444.690815117918, 4390.936027020535, 4320.871273194372, 4187.393383557383], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649995/1000000 [5:13:10<1:55:44, 50.40it/s]global step 650000, trans_decision ep_re 3901.0566998744316

{"global_step": 650000, "eval_re": [4605.845078590656, 4353.6779833545, 
3897.781636627888, 3765.342705232889, 4138.954918095829, 4175.593896559375, 
4191.4156212444495, 4578.343193811706, 670.7026540882487, 4632.909311138768], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [5:18:10<1:53:20, 50.00it/s]global step 660000, trans_decision ep_re 4338.3883797092

{"global_step": 660000, "eval_re": [4264.387561890077, 4585.868147002185, 
2581.670332803545, 4733.1950857553475, 4284.749500472444, 4958.3919758267875, 
4797.507181548244, 4409.477430745158, 4850.942695997184, 3917.693885051036], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [5:23:00<1:49:37, 50.17it/s]global step 670000, trans_decision ep_re 4214.789762637657

{"global_step": 670000, "eval_re": [4291.3055311417265, 4180.915249022223, 
4468.5587665751855, 4709.712547371331, 4203.804499393429, 5307.529167289468, 
2175.1421351010954, 4785.588054281138, 4217.768614231585, 3807.57306196939], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679995/1000000 [5:27:50<1:46:29, 50.09it/s]global step 680000, trans_decision ep_re 3754.8802058163033

{"global_step": 680000, "eval_re": [4131.841465730546, 3999.7519829027774, 
628.6616840636246, 4229.267163425091, 4359.752369211929, 4606.605037093392, 
5014.578227571281, 4513.323328228331, 4820.00117637762, 1245.019623558437], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [5:32:40<1:43:14, 50.04it/s]global step 690000, trans_decision ep_re 4406.708623845756

{"global_step": 690000, "eval_re": [4301.613515497284, 4474.662262185392, 
4654.459574330012, 4037.9074423959873, 4804.547323580177, 4493.113391691395, 
4101.190783412016, 4096.277464893342, 4571.3105123796095, 4532.003968092334], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699995/1000000 [5:37:30<1:39:32, 50.23it/s]global step 700000, trans_decision ep_re 4231.399590274437

{"global_step": 700000, "eval_re": [4228.861130306402, 4995.950720569361, 
4632.144897443479, 3565.387491262367, 4649.133103394596, 4138.946773233956, 
4439.978724071026, 4448.213444161759, 4254.884349646927, 2960.495268654493], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [5:42:20<1:36:50, 49.91it/s]global step 710000, trans_decision ep_re 4594.476156887262

{"global_step": 710000, "eval_re": [4806.201121871113, 4821.604227568486, 
4286.472251033453, 4537.370353927651, 4377.4019360350885, 4743.541574277292, 
4652.303846736246, 5020.905394287421, 4469.217379975868, 4229.7434831599985], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [5:47:10<1:33:11, 50.08it/s]global step 720000, trans_decision ep_re 4300.04388326283

{"global_step": 720000, "eval_re": [4014.1943308699892, 4559.0022125262385, 
4803.713424524305, 4223.52919892547, 4022.2727118821617, 3652.909068462348, 
4360.3318854653435, 4034.159428862463, 4721.869132248747, 4608.457438861234], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729997/1000000 [5:52:00<1:29:04, 50.52it/s]global step 730000, trans_decision ep_re 4120.089658011357

{"global_step": 730000, "eval_re": [4240.5243452011155, 4450.1943933929, 
4550.897113038709, 4950.104267367445, 5058.785745877457, 1537.3619667516143, 
4289.285016015808, 3819.8157677300474, 4081.469703332653, 4222.458261405817], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [5:56:51<1:26:46, 49.94it/s]global step 740000, trans_decision ep_re 4181.026956547721

{"global_step": 740000, "eval_re": [4751.407949104913, 3077.464155700095, 
4456.6049908962605, 3976.9060874688385, 4530.530984695107, 4768.351329488455, 
3998.024066428598, 4739.870536584699, 2788.720361702278, 4722.389103407965], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749997/1000000 [6:01:41<1:23:06, 50.13it/s]global step 750000, trans_decision ep_re 3725.1263956209596

{"global_step": 750000, "eval_re": [4505.499885017287, 4148.661032369646, 
4558.981405748917, 1152.344205382776, 4345.740509856833, 4820.361970032349, 
3905.3333779786967, 329.946831726235, 4914.132860252382, 4570.261877844472], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759995/1000000 [6:06:31<1:19:41, 50.19it/s]global step 760000, trans_decision ep_re 4319.2470277437715

{"global_step": 760000, "eval_re": [4802.422337714313, 1688.6352411442192, 
4433.568390318857, 4806.9852334856805, 4857.85729897855, 4072.644260285734, 
4727.270916704673, 4009.7602387316665, 4708.644992884019, 5084.681367189995], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769995/1000000 [6:11:21<1:16:28, 50.12it/s]global step 770000, trans_decision ep_re 3810.0529378385654

{"global_step": 770000, "eval_re": [4835.556898730203, 4386.592189304766, 
4448.825966950087, 2775.45678718084, 4318.8242588064695, 4869.370403628391, 
4907.676560358443, 542.0630262963695, 2124.012343266818, 4892.150943863264], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [6:16:11<1:12:47, 50.37it/s]global step 780000, trans_decision ep_re 4183.4877472171775

{"global_step": 780000, "eval_re": [4356.426775437838, 4172.803020405672, 
4479.805702276537, 3161.5654133966214, 4041.9220989590553, 4434.989206661566, 
4014.990830983722, 4453.425165023576, 4251.626604358936, 4467.322654668248], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789995/1000000 [6:21:01<1:09:37, 50.27it/s]global step 790000, trans_decision ep_re 3860.1866265582803

{"global_step": 790000, "eval_re": [3507.803586867198, 4761.94826337426, 
1357.6659034568136, 5114.800040503559, 3733.312991539275, 4725.900028232866, 
4735.162099211141, 1486.756770523599, 4700.975335017171, 4477.54124685691], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799997/1000000 [6:25:51<1:06:41, 49.98it/s]global step 800000, trans_decision ep_re 4732.442961219842

{"global_step": 800000, "eval_re": [4132.40567313944, 4162.820551602194, 
4993.327618641038, 4975.390196178824, 4289.247797090682, 5019.706368045962, 
5110.690921795524, 4757.5882566689515, 5303.949921625595, 4579.302307410202], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809997/1000000 [6:30:42<1:03:13, 50.09it/s]global step 810000, trans_decision ep_re 4567.385404874114

{"global_step": 810000, "eval_re": [4598.8501968850705, 4654.115369912327, 
5330.138377660734, 4453.195980690584, 4370.46218907063, 4561.054225633192, 
4467.846797738312, 4456.4456029512, 4145.604743270862, 4636.1405649282315], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [6:35:32<59:49, 50.15it/s]global step 820000, trans_decision ep_re 4574.108876506142

{"global_step": 820000, "eval_re": [4367.336918335801, 4583.978740579877, 
4834.181384989787, 5059.9374139973725, 3975.9870204770154, 4638.589577052302, 
5143.085017415588, 4109.485076788217, 4317.682757081652, 4710.824858343805], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829995/1000000 [6:40:22<56:38, 50.02it/s]global step 830000, trans_decision ep_re 4074.761476960596

{"global_step": 830000, "eval_re": [4091.8823839903093, 4639.053182287362, 
4699.066212806031, 4528.527605513617, 4478.1196181330415, 4540.535570722353, 
1283.1666801821152, 4281.638458681354, 4302.654976142869, 3902.970081146907], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [6:45:12<53:07, 50.20it/s]global step 840000, trans_decision ep_re 4305.627924422165

{"global_step": 840000, "eval_re": [1025.4318826373803, 4810.388430507915, 
4567.924719017257, 4681.415684526793, 4918.387514993456, 4137.260109217668, 
4531.677406714286, 5027.714951558351, 4721.393657312217, 4634.684887736331], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849997/1000000 [6:50:02<50:10, 49.83it/s]global step 850000, trans_decision ep_re 4152.024696427391

{"global_step": 850000, "eval_re": [4315.11675884916, 3898.084066221546, 
4663.083041068413, 2628.2554175928676, 4205.948755346215, 4194.623888758167, 
4688.873814173698, 3845.3961587512663, 4058.0463657454507, 5022.818697767119], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [6:54:52<46:21, 50.33it/s]global step 860000, trans_decision ep_re 4379.832047270736

{"global_step": 860000, "eval_re": [5077.900826717192, 3152.43433240858, 
4401.8125992194055, 4856.349809683568, 5100.017443121494, 4524.589500987907, 
4070.8759345304943, 4615.292147145045, 3878.925717944282, 4120.122160949402], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [6:59:42<43:19, 50.02it/s]global step 870000, trans_decision ep_re 4363.96663236819

{"global_step": 870000, "eval_re": [4372.908872862075, 2024.6007969296109, 
4357.095708552138, 4408.43780248824, 4435.39449289319, 5068.410286807546, 
4626.851292943505, 5278.887346507009, 4305.205708223233, 4761.874015475352], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879995/1000000 [7:04:32<40:00, 50.00it/s]global step 880000, trans_decision ep_re 4445.599491744636

{"global_step": 880000, "eval_re": [4884.473783419524, 4621.589170977891, 
4852.653340586052, 4923.572029055394, 4645.249344062074, 4485.836612207648, 
4717.1735872787285, 4170.92216611502, 4411.809720575291, 2742.715163168745], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889995/1000000 [7:09:22<36:38, 50.04it/s]global step 890000, trans_decision ep_re 3656.7927690842757

{"global_step": 890000, "eval_re": [2734.125245642269, 291.61381469071256, 
2052.921326475635, 4434.712733213677, 4047.8767258881294, 4603.879692090304, 
4444.455479178884, 5029.781748302184, 4544.448531917911, 4384.112393443051], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899995/1000000 [7:14:12<33:22, 49.94it/s]global step 900000, trans_decision ep_re 4051.6614102790127

{"global_step": 900000, "eval_re": [4864.268582278281, 4806.233589381283, 
187.65051589435052, 4880.781258684359, 4073.8937859217463, 4593.422999344988, 
3666.4707234294833, 4261.586520875201, 4840.070226667209, 4342.235900313224], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [7:19:02<30:18, 49.49it/s]global step 910000, trans_decision ep_re 4445.216937410377

{"global_step": 910000, "eval_re": [4559.758119710588, 4692.623080317461, 
4399.176996839992, 4412.423158571566, 3651.418756842868, 4422.16692288243, 
4912.0179638360305, 4590.333884336222, 4561.467615849707, 4250.782874916901], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [7:23:52<26:34, 50.18it/s]global step 920000, trans_decision ep_re 4697.857552460491

{"global_step": 920000, "eval_re": [5089.506979791282, 4852.4065099586505, 
5021.441721058306, 4574.687335538761, 4010.7495262067005, 4834.724732643845, 
4997.25616086807, 3514.798547746639, 4999.408057251158, 5083.595953541509], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929997/1000000 [7:28:42<23:24, 49.85it/s]global step 930000, trans_decision ep_re 4440.436081540996

{"global_step": 930000, "eval_re": [3844.4682213599435, 4389.398566579226, 
3242.5400472328456, 4175.747878223701, 4380.66145063617, 5021.159435760753, 
5147.578602251701, 4742.768985511685, 4567.500492459154, 4892.537135394784], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [7:33:43<19:55, 50.20it/s]global step 940000, trans_decision ep_re 4192.955405049173

{"global_step": 940000, "eval_re": [2760.2602049358293, 5244.182576988613, 
5046.967524227479, 4892.498447764446, 4555.706116669805, 4384.859047709538, 
5188.6316588026275, 4889.225577823133, 3054.041195742646, 1913.1816998276167], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949995/1000000 [7:38:33<16:39, 50.03it/s]global step 950000, trans_decision ep_re 4577.622759025945

{"global_step": 950000, "eval_re": [5045.933960812529, 4908.449638482487, 
4483.024233670575, 5111.9867745117235, 4352.992627168701, 3258.136424484692, 
4331.293156580086, 4763.119441322315, 4721.846871444407, 4799.444461781934], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959997/1000000 [7:43:23<13:15, 50.28it/s]global step 960000, trans_decision ep_re 4507.7747849703

{"global_step": 960000, "eval_re": [3789.693409010928, 4726.55677018757, 
4593.471195517684, 4548.876911108615, 4807.459450093035, 4496.038967355472, 
4855.701788240397, 4802.032153038993, 4716.037818186698, 3741.879386963611], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [7:48:03<10:00, 49.97it/s]global step 970000, trans_decision ep_re 4857.651957420775

{"global_step": 970000, "eval_re": [4897.368346046084, 4957.3550511047715, 
5188.641908735257, 5549.379195877667, 4602.00335922277, 4243.215190405823, 
4972.208605978466, 4829.534803816105, 4677.0746079746095, 4659.738505046195], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979995/1000000 [7:53:03<06:40, 50.01it/s]global step 980000, trans_decision ep_re 4714.418350997474

{"global_step": 980000, "eval_re": [5152.183948215698, 5273.269643781043, 
4538.755610804317, 4765.380668713537, 4627.613727586627, 4481.525369179596, 
4843.119142393063, 5387.40400805906, 4430.787860085822, 3644.1435311559667], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989995/1000000 [7:57:53<03:19, 50.17it/s]global step 990000, trans_decision ep_re 4197.474992905705

{"global_step": 990000, "eval_re": [4487.988017041627, 4311.169845466455, 
4192.768320670618, 3010.849123129153, 4304.535328300733, 4696.256220682749, 
4107.733444184498, 4169.030016361284, 4440.680219137439, 4253.739394082504], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [8:02:43<00:00, 49.97it/s]global step 1000000, trans_decision ep_re 4466.5207770418765

{"global_step": 1000000, "eval_re": [4742.291972965747, 4269.592258894166, 
4583.926551995572, 4118.110571876208, 4837.552639661655, 4076.30643013565, 
3855.920543961011, 4792.196303743205, 4896.597060301946, 4492.713436883606], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [8:02:56<00:00, 34.51it/s]
