
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9997/1000000 [02:50<5:30:38, 49.90it/s]global step 10000, trans_decision ep_re -40.939409709457536

{"global_step": 10000, "eval_re": [-47.464800130708504, 3.207213406657277, 
-24.059889735455886, -173.94801641271346, 15.511456320392103, 
-24.705002865212293, -109.89267747090466, -28.692074439454615, 
54.618353993404625, -73.96865976057994], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19995/1000000 [08:20<5:30:16, 49.45it/s]global step 20000, trans_decision ep_re 967.3322090261961

{"global_step": 20000, "eval_re": [589.446152253867, 2061.6040896281224, 
327.41577222845143, 1392.1051690575548, 477.9547405341746, 53.86302675600796, 
157.0835104482018, 1592.331955778849, 1004.7072628992066, 2016.8104106775243], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29996/1000000 [14:00<5:47:27, 46.53it/s]global step 30000, trans_decision ep_re 2038.7847536413028

{"global_step": 30000, "eval_re": [2741.6786800350633, 1761.237527400702, 
1916.5259231966108, 1240.9490082298573, 2536.1945046627297, 104.35492255843332, 
2745.292976630423, 2658.146207219559, 1521.3506119215172, 3162.1171745581355], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39995/1000000 [19:20<5:27:19, 48.88it/s]global step 40000, trans_decision ep_re 2743.8706485999746

{"global_step": 40000, "eval_re": [2324.8909852911343, 3164.9125121692286, 
2364.7998019508705, 2641.6636911513624, 3314.5814892343305, 3452.625822144889, 
3013.597998385262, 861.1890251844272, 3062.5258788974907, 3237.919281590757], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49998/1000000 [24:50<5:33:26, 47.49it/s]global step 50000, trans_decision ep_re 3233.2159336136488

{"global_step": 50000, "eval_re": [2379.8131349886635, 3136.9052632850953, 
3722.436597775973, 3273.102436912909, 3351.810853958523, 3437.3396692343113, 
3523.24910536194, 3384.8251195622215, 2887.748422972164, 3234.928732084688], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [30:30<5:08:19, 50.81it/s]global step 60000, trans_decision ep_re 3435.473918435428

{"global_step": 60000, "eval_re": [3916.3480384044365, 3323.5945715756125, 
3305.232078857353, 3791.386089201288, 3636.3209198973245, 3434.1798269440724, 
3827.448066096946, 3636.2297340552786, 2673.2159295363635, 2810.783929785603], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [36:00<5:05:08, 50.80it/s]global step 70000, trans_decision ep_re 3997.8795900251107

{"global_step": 70000, "eval_re": [3367.0217696058467, 4283.648940237437, 
3733.554308163677, 3478.046165668026, 4361.969841065935, 4729.941440690384, 
4390.198897978381, 3496.4073081861998, 3958.246392854947, 4179.760835800268], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [41:30<5:00:17, 51.06it/s]global step 80000, trans_decision ep_re 4304.777545451509

{"global_step": 80000, "eval_re": [3721.4088894237957, 4530.215610963788, 
4536.2174418965005, 4535.193576842259, 4232.904759684099, 4529.8450955176395, 
4310.428342121691, 4030.5924718446736, 4171.196439856905, 4449.772826363734], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89997/1000000 [47:10<6:42:54, 37.64it/s]global step 90000, trans_decision ep_re 3734.1241706500123

{"global_step": 90000, "eval_re": [3524.784600848598, 3320.5401861632495, 
3315.312885747884, 3429.6214740880473, 3572.674318398725, 3496.431685133027, 
4002.9489946138997, 4432.920315827655, 4298.922221870929, 3947.085023808102], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [52:40<7:01:52, 35.56it/s]global step 100000, trans_decision ep_re 4065.064677450076

{"global_step": 100000, "eval_re": [4654.693908496391, 3879.4631301188806, 
4144.362039796163, 3121.793824043675, 5013.389659442737, 1199.3500343921403, 
4747.412070355937, 4748.221376386224, 4346.437647932512, 4795.523083536101], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [58:20<6:39:12, 37.16it/s]global step 110000, trans_decision ep_re 4188.8830923434025

{"global_step": 110000, "eval_re": [3911.5435161452147, 4792.39625537416, 
4232.543687683072, 2909.132374637718, 4330.122417949172, 4003.5526918787677, 
3236.7554039720003, 4983.955208687275, 5078.171964803774, 4410.657402302873], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:03:50<4:53:26, 49.98it/s]global step 120000, trans_decision ep_re 4107.659946224565

{"global_step": 120000, "eval_re": [4441.280847397109, 4191.2612999633575, 
4321.210845406539, 4196.389873216369, 3659.764915090971, 4402.995748929812, 
4482.3058159732955, 4034.315366502315, 3642.471682853617, 3704.603066912273], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:09:30<4:45:43, 50.75it/s]global step 130000, trans_decision ep_re 3947.4342678358385

{"global_step": 130000, "eval_re": [4696.050833683473, 4398.583604678346, 
3527.374694266926, 3117.9183684378177, 3894.2061678459745, 3423.8594678055974, 
4031.5518755701614, 3604.966088809065, 4176.976259204795, 4602.855318056232], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [1:15:00<6:47:04, 35.21it/s]global step 140000, trans_decision ep_re 3933.9634954949215

{"global_step": 140000, "eval_re": [2599.578944135373, 3291.7523944811605, 
4776.882869058298, 3320.1228523179852, 4417.563181458378, 3646.410522810326, 
4342.096391117807, 4215.407544397998, 4641.392889426622, 4088.4273657452713], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149996/1000000 [1:20:20<6:15:27, 37.73it/s]global step 150000, trans_decision ep_re 3606.7446158195226

{"global_step": 150000, "eval_re": [4223.98692902721, 3221.7603753160774, 
3183.4483773002976, 4589.47837662829, 4711.637607408792, 4256.242531909914, 
3929.9598116021475, 3847.4415239947966, 1233.106568432081, 2870.384056575624], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159995/1000000 [1:26:00<5:05:49, 45.78it/s]global step 160000, trans_decision ep_re 4407.771941004999

{"global_step": 160000, "eval_re": [4715.15703632629, 4323.544165403434, 
5209.732636564224, 4930.932495781316, 4429.871400118659, 4103.453869100989, 
4078.518915249512, 4176.369269514019, 4546.054637071716, 3564.0849849198285], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169999/1000000 [1:31:10<4:33:43, 50.54it/s]global step 170000, trans_decision ep_re 3792.5933994023603

{"global_step": 170000, "eval_re": [4458.317445938682, 4740.086673462288, 
4273.80239841087, 3662.430971591144, 4287.640123315899, 684.921290522083, 
4205.394935989337, 3752.302588382021, 3331.616678700431, 4529.420887710849], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179999/1000000 [1:36:40<4:33:28, 49.97it/s]global step 180000, trans_decision ep_re 4495.661890939308

{"global_step": 180000, "eval_re": [3968.0948211294203, 3198.7454856626246, 
4854.281445339616, 4861.962328518626, 4537.530566712638, 4412.723387926509, 
5221.293164589821, 4638.50263784123, 4553.020133497871, 4710.464938174732], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [1:42:11<4:26:35, 50.64it/s]global step 190000, trans_decision ep_re 4254.520090503598

{"global_step": 190000, "eval_re": [4516.324001939124, 5122.716934181666, 
3383.8905175507175, 4353.910786371502, 3711.636806225674, 4654.10231523418, 
4200.51594082961, 4376.275151224905, 3642.7251738008245, 4583.103277677782], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [1:47:31<4:25:43, 50.18it/s]global step 200000, trans_decision ep_re 4257.42255390269

{"global_step": 200000, "eval_re": [5016.6668639502395, 4303.583871473704, 
4158.023927846097, 3898.4917858754525, 4422.991169541242, 3606.0033732310485, 
4574.22691180628, 4476.617425919077, 4223.868280744036, 3893.7519286397246], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209995/1000000 [1:53:01<4:40:14, 46.98it/s]global step 210000, trans_decision ep_re 4538.907092204072

{"global_step": 210000, "eval_re": [2992.8496695441395, 5001.364351983926, 
4973.751618997957, 5030.235207307934, 5203.190398963578, 4879.3395870894055, 
4019.3031615822565, 4405.01604345213, 4316.296468200021, 4567.724414919371], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [1:58:31<4:16:04, 50.77it/s]global step 220000, trans_decision ep_re 3634.2102373452776

{"global_step": 220000, "eval_re": [2471.0723512971254, 4624.6113468813155, 
2939.2907733729094, 3542.507190891885, 4022.638846094285, 3276.877802055315, 
3270.1949793452795, 4268.490384428856, 4106.380806335617, 3820.037892750187], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229998/1000000 [2:03:51<5:40:09, 37.73it/s]global step 230000, trans_decision ep_re 4195.576627446026

{"global_step": 230000, "eval_re": [3801.4402824200965, 4235.843965752177, 
4727.357087677552, 4400.617418518171, 4358.015570823466, 4547.427818666018, 
2951.659312158942, 4241.797371077326, 4575.223585804534, 4116.383861561973], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239997/1000000 [2:09:11<6:02:38, 34.93it/s]global step 240000, trans_decision ep_re 4135.896565141062

{"global_step": 240000, "eval_re": [3575.2632988117202, 4273.823137851471, 
4376.672970050074, 4377.035745197121, 3250.800720485121, 4020.849097753528, 
3898.9191908128837, 5029.3308006790085, 4241.218504310269, 4315.052185459426], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [2:14:41<4:24:48, 47.20it/s]global step 250000, trans_decision ep_re 4201.012416461415

{"global_step": 250000, "eval_re": [4834.035361079647, 3924.3696197642303, 
3917.759605542948, 4565.280120438187, 3769.4133480217674, 4017.885643957395, 
4562.683514322841, 3733.394208215058, 4415.381069114802, 4269.9216741572745], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [2:19:41<4:15:20, 48.30it/s]global step 260000, trans_decision ep_re 4359.006428253922

{"global_step": 260000, "eval_re": [4174.98146735128, 3996.542944124881, 
3954.648224408624, 4857.559682304327, 4523.318868417695, 4713.938823895229, 
4613.968185172985, 4245.801966091283, 3939.5331981905174, 4569.770922582407], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269998/1000000 [2:24:52<4:21:27, 46.53it/s]global step 270000, trans_decision ep_re 4297.755649247176

{"global_step": 270000, "eval_re": [3846.211269998957, 5163.61876222558, 
4332.470749136413, 5323.205556131408, 4599.363861036763, 4670.085135401434, 
3521.650614117192, 4600.2153857972235, 3371.186390286354, 3549.5487683404267], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279997/1000000 [2:30:02<3:58:24, 50.33it/s]global step 280000, trans_decision ep_re 3945.9434035764084

{"global_step": 280000, "eval_re": [3269.216449303766, 5239.418319895023, 
5046.980684967031, 4284.95388589075, 4054.742440311486, 3890.5192632951703, 
3648.01918093437, 4643.571897767272, 4543.515802323132, 838.4961110760744], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [2:34:52<3:54:21, 50.49it/s]global step 290000, trans_decision ep_re 4556.837928344912

{"global_step": 290000, "eval_re": [4941.172328144885, 4927.9048857267335, 
4537.278295717667, 4677.677303143623, 4668.733135306915, 4066.808799738734, 
4644.22403555178, 3164.2686769738366, 4892.15228995902, 5048.159533185925], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299995/1000000 [2:39:52<4:14:13, 45.89it/s]global step 300000, trans_decision ep_re 4328.259433226389

{"global_step": 300000, "eval_re": [3509.868551901831, 4774.91042469079, 
3870.1368881217422, 5003.865286679302, 4346.8550817665355, 3935.6783125378984, 
4107.26429892778, 4924.933311222982, 4667.150310154168, 4141.931866260865], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309998/1000000 [2:45:22<4:59:24, 38.41it/s]global step 310000, trans_decision ep_re 4392.508461478968

{"global_step": 310000, "eval_re": [4154.620549064875, 4991.031061771109, 
4899.538212411241, 4865.103500405176, 4284.401878018775, 3872.09784146278, 
3441.693536676463, 4830.464877823252, 4596.819705090109, 3989.313452065891], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319998/1000000 [2:51:03<5:01:38, 37.57it/s]global step 320000, trans_decision ep_re 3971.7222767550447

{"global_step": 320000, "eval_re": [4622.638923798001, 3134.0314407445403, 
4098.105249031905, 3611.6519109446963, 4791.447494394732, 3854.554892301467, 
3386.575488981151, 4069.4521905404067, 3911.7301004998267, 4237.035076313718], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [2:56:53<4:58:52, 37.36it/s]global step 330000, trans_decision ep_re 4170.0332895519805

{"global_step": 330000, "eval_re": [3956.40483131544, 4253.482525177376, 
3793.870090215803, 3430.583870834708, 4507.361086296636, 4441.40308070951, 
3821.7882587644663, 4642.997680856166, 3782.60789093525, 5069.833580414447], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339998/1000000 [3:02:13<3:50:31, 47.72it/s]global step 340000, trans_decision ep_re 4522.305160813114

{"global_step": 340000, "eval_re": [4094.876908495779, 4466.149310532253, 
4122.472508224996, 4144.465701712245, 4687.3674498126475, 4804.412788137731, 
4960.479272573002, 4789.893094223652, 4724.762925910183, 4428.171648508651], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349998/1000000 [3:07:33<4:52:03, 37.09it/s]global step 350000, trans_decision ep_re 4242.981210385736

{"global_step": 350000, "eval_re": [4506.650565964231, 3720.0817901755645, 
4521.499553597652, 4713.432647494684, 3841.992099057114, 4340.254102245294, 
4547.327066817371, 3213.624419334142, 4456.784787975087, 4568.165071196222], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [3:12:23<3:30:09, 50.76it/s]global step 360000, trans_decision ep_re 3918.592163956834

{"global_step": 360000, "eval_re": [3159.1537409257094, 4354.783565243665, 
3980.6683778076003, 3799.74445422169, 3921.5005296999384, 3957.409817609908, 
4159.841299662227, 3793.830687934401, 4093.988514877301, 3965.000651585897], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [3:17:13<3:28:04, 50.46it/s]global step 370000, trans_decision ep_re 4477.450043824208

{"global_step": 370000, "eval_re": [4559.098750361322, 5034.01008931816, 
5076.517007870785, 4048.111936083973, 3453.08531079819, 5214.198704465791, 
4570.346822725935, 3556.997265440932, 4191.928359532667, 5070.206191644318], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379995/1000000 [3:22:04<3:24:59, 50.41it/s]global step 380000, trans_decision ep_re 3947.1286495427375

{"global_step": 380000, "eval_re": [4336.264327665577, 3969.9936325958415, 
3720.9268671417835, 3444.541963373231, 4746.9671371423165, 3936.106117777798, 
2842.093539322668, 3614.272368151137, 4487.649231572457, 4372.471310684569], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389995/1000000 [3:26:44<3:23:04, 50.06it/s]global step 390000, trans_decision ep_re 4195.826094246579

{"global_step": 390000, "eval_re": [4900.764611690955, 4014.938196203381, 
3576.3159359130527, 4124.591956490907, 4249.310862404209, 3877.941461024786, 
4332.716201297667, 3972.5018217666625, 4164.0937102353355, 4745.086185438829], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399995/1000000 [3:31:34<3:17:58, 50.51it/s]global step 400000, trans_decision ep_re 4409.655982452317

{"global_step": 400000, "eval_re": [5319.382697536515, 4985.518671717286, 
3597.4618795042516, 2907.634828284624, 5188.271935723733, 4089.793197202667, 
4572.728838440154, 3896.019242564767, 4612.889574535031, 4926.858959014144], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409995/1000000 [3:36:24<3:21:18, 48.85it/s]global step 410000, trans_decision ep_re 4570.482126957972

{"global_step": 410000, "eval_re": [4714.0968184286, 4553.125807877738, 
3667.3590867104135, 4747.317283718223, 4915.2733474051165, 4438.600994191157, 
4951.161418928909, 4207.427101497811, 4757.174224409126, 4753.28518641262], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419999/1000000 [3:41:04<3:11:42, 50.42it/s]global step 420000, trans_decision ep_re 4947.950035201283

{"global_step": 420000, "eval_re": [3932.4322758709127, 5276.0815244495625, 
4849.775368423124, 4792.854118828948, 5163.248306961141, 5096.813862051672, 
5186.378517154618, 5118.1109632934085, 5004.750053142564, 5059.0553618368795], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429995/1000000 [3:45:54<3:07:47, 50.59it/s]global step 430000, trans_decision ep_re 4566.569484262239

{"global_step": 430000, "eval_re": [5332.885170873474, 4914.206971112673, 
4970.203010190739, 4525.51295565248, 5015.051958072083, 5022.878972741764, 
5209.794874467966, 3270.6529077913574, 3230.900665182484, 4173.607356537365], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439995/1000000 [3:50:34<3:05:27, 50.32it/s]global step 440000, trans_decision ep_re 4611.4604905186025

{"global_step": 440000, "eval_re": [4948.254555238503, 4635.989930537346, 
4470.604742839889, 3824.512674690876, 4681.617310542642, 3781.538846730772, 
5140.612825702241, 5344.91340144139, 4696.705466306076, 4589.855151156291], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449995/1000000 [3:55:24<3:02:24, 50.26it/s]global step 450000, trans_decision ep_re 4507.667331321279

{"global_step": 450000, "eval_re": [4911.1821477844915, 3872.111039903533, 
4233.804773932891, 4946.701506483326, 4834.777122450725, 3883.5245695719823, 
4750.696485217314, 4728.128388085741, 3980.299305938507, 4935.447973844274], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459997/1000000 [4:00:14<2:57:42, 50.64it/s]global step 460000, trans_decision ep_re 4552.65391904844

{"global_step": 460000, "eval_re": [4216.803810596735, 4553.410134123429, 
4875.69224945424, 4486.168763520904, 3929.957678739521, 4398.696073495889, 
5135.1760939942305, 4688.195056445951, 4658.122642636054, 4584.316687477444], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [4:04:54<2:54:26, 50.64it/s]global step 470000, trans_decision ep_re 4539.827847700065

{"global_step": 470000, "eval_re": [2368.264932724494, 4542.28218817718, 
4589.575756289027, 4892.055756691841, 4772.41582613455, 5120.015836993152, 
4487.569657020827, 5452.804729435594, 4297.777700639125, 4875.5160928948535], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [4:09:44<2:52:03, 50.37it/s]global step 480000, trans_decision ep_re 4390.331845774656

{"global_step": 480000, "eval_re": [5020.233013468292, 4497.616108756183, 
4796.339348067265, 4032.38725551214, 4526.899275451664, 4680.132373563814, 
4045.2129295026857, 4648.958503959512, 3275.393211607623, 4380.14643785739], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489997/1000000 [4:14:24<2:49:11, 50.24it/s]global step 490000, trans_decision ep_re 4629.7456310087355

{"global_step": 490000, "eval_re": [4447.086414039218, 5358.821571989372, 
5090.9216023317385, 4600.565071999375, 4592.689806960151, 2999.325400860685, 
4613.623922383345, 4953.3172570633105, 5266.822248088312, 4374.283014371853], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [4:19:14<2:45:22, 50.39it/s]global step 500000, trans_decision ep_re 4475.094180220391

{"global_step": 500000, "eval_re": [4835.011040616657, 4626.9071422154375, 
4109.184307702677, 4949.794200034697, 5231.712722697043, 4258.615354575277, 
5087.972793100356, 4457.956854732502, 3084.382461395506, 4109.404925133751], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509995/1000000 [4:24:04<2:41:31, 50.56it/s]global step 510000, trans_decision ep_re 4556.018386241352

{"global_step": 510000, "eval_re": [4096.727924678773, 4486.477909102374, 
4638.88625198581, 4579.672880944353, 5026.83499901349, 4594.701521420791, 
3974.9993726287926, 4519.729266742501, 4790.217307377213, 4851.936428519431], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519997/1000000 [4:28:44<2:38:59, 50.32it/s]global step 520000, trans_decision ep_re 4153.847458647523

{"global_step": 520000, "eval_re": [4092.7139391473584, 3900.1675272622883, 
3531.5982293296165, 4579.203813242923, 4534.310333718311, 4526.436012540813, 
3712.566617285513, 3992.8471736150427, 4032.2284531678733, 4636.402487165495], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529995/1000000 [4:33:34<4:00:27, 32.58it/s]global step 530000, trans_decision ep_re 4426.861268492847

{"global_step": 530000, "eval_re": [4186.1703164510745, 4742.1886856361125, 
4811.348624828152, 4313.464322602562, 3969.1607504599283, 4314.457677308361, 
4920.018474154889, 4924.426142538093, 3136.0805236442984, 4951.297167305004], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [4:38:24<2:37:41, 48.62it/s]global step 540000, trans_decision ep_re 4456.49062521205

{"global_step": 540000, "eval_re": [4199.050805736013, 5426.327395458424, 
5152.137875073465, 3917.847219086677, 3845.735266614734, 4937.780035173136, 
4319.867411426782, 3244.6654083415633, 4850.990697247631, 4670.504137962072], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [4:43:04<2:29:20, 50.22it/s]global step 550000, trans_decision ep_re 4510.266677360187

{"global_step": 550000, "eval_re": [5205.262885838437, 4463.056618147027, 
5084.082496422951, 4969.459011730253, 5453.70077670281, 3364.667153649098, 
4010.918028718655, 4453.037218069361, 4228.949600965932, 3869.532983357339], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [4:47:54<2:24:26, 50.77it/s]global step 560000, trans_decision ep_re 4483.943365452375

{"global_step": 560000, "eval_re": [4303.009036512093, 4307.111766779462, 
3980.882782818672, 4788.722078698819, 4775.165778198639, 4625.916592083824, 
4123.712474504529, 4540.6787103347715, 4438.595706045459, 4955.638728547467], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569995/1000000 [4:52:34<3:03:47, 38.99it/s]global step 570000, trans_decision ep_re 4404.048804002472

{"global_step": 570000, "eval_re": [3961.235888246862, 4748.717316869118, 
4703.958484303253, 4331.364971452361, 4828.0127554006585, 3683.3989528564703, 
4101.073824549488, 4108.536342214091, 4597.490130387293, 4976.699373745123], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [4:57:14<2:18:15, 50.63it/s]global step 580000, trans_decision ep_re 4608.163302099411

{"global_step": 580000, "eval_re": [5074.2927608796645, 3907.5981680129453, 
5002.324911646011, 4598.262694743432, 5025.352561535562, 4396.075265310228, 
4949.252371091472, 4781.771400874927, 4989.122828623518, 3357.580058276345], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589995/1000000 [5:02:04<2:14:34, 50.78it/s]global step 590000, trans_decision ep_re 4313.803629910599

{"global_step": 590000, "eval_re": [5290.4738854140205, 4701.558616591704, 
3926.8645714052664, 4349.613078081706, 3670.3167510615935, 4306.70114873642, 
4587.287672839279, 5038.590593477682, 2477.616934185765, 4789.013047312548], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599995/1000000 [5:06:44<2:16:33, 48.82it/s]global step 600000, trans_decision ep_re 4734.726246216893

{"global_step": 600000, "eval_re": [4906.701304374223, 4683.541562086181, 
4893.3463368457315, 3631.700426477693, 4977.5048745858885, 5146.189753119533, 
4573.790963935423, 3910.621846933393, 5208.840603380871, 5415.02479042999], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609995/1000000 [5:11:34<2:08:18, 50.66it/s]global step 610000, trans_decision ep_re 4400.610871406695

{"global_step": 610000, "eval_re": [4227.251792255204, 4625.47111520537, 
4145.703309274908, 3977.3014809858078, 4836.420631316476, 4603.160213152732, 
3240.130324086614, 5042.71603650471, 4451.933432474353, 4856.0203788107665], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [5:16:14<2:08:19, 49.35it/s]global step 620000, trans_decision ep_re 4537.758449611125

{"global_step": 620000, "eval_re": [3888.9342979259454, 4855.107851147941, 
4781.4440038744115, 4663.199385308589, 3478.9174342011815, 4472.495840575886, 
4820.237771861675, 4961.979266070454, 4559.503905653376, 4895.76473949178], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [5:20:54<2:01:23, 50.80it/s]global step 630000, trans_decision ep_re 4283.485999296159

{"global_step": 630000, "eval_re": [821.7188935091203, 5141.392334052687, 
4474.028407271569, 4775.956186376513, 5184.1885583614785, 4708.202501246559, 
4457.252500232856, 5299.307850693934, 4207.604520957252, 3765.2082402596293], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639995/1000000 [5:25:44<1:57:55, 50.88it/s]global step 640000, trans_decision ep_re 4624.378180327358

{"global_step": 640000, "eval_re": [4240.196941617292, 5346.243241310093, 
4219.0977763026785, 4713.456253060855, 5197.2337241254045, 5359.458855281208, 
4901.7472739710665, 3701.3002407459417, 3734.7513759086137, 4830.296120950426], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [5:30:24<1:54:43, 50.85it/s]global step 650000, trans_decision ep_re 4849.152451575681

{"global_step": 650000, "eval_re": [5356.284710855995, 4900.460164133605, 
5331.629475104818, 5194.840408527861, 4054.7279496687556, 4921.48741986012, 
5178.219364258254, 5136.189859839554, 3780.5696008528607, 4637.115562654987], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659995/1000000 [5:35:04<1:51:51, 50.66it/s]global step 660000, trans_decision ep_re 4485.416015628433

{"global_step": 660000, "eval_re": [3961.4589032032427, 3802.0291218349034, 
3708.3418146670588, 4263.753848212058, 5340.6686494035175, 5039.637868316996, 
4883.97583397176, 4175.260288225164, 4875.290253354946, 4803.74357509468], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669995/1000000 [5:39:54<1:48:36, 50.64it/s]global step 670000, trans_decision ep_re 4390.565220180752

{"global_step": 670000, "eval_re": [4267.888002039373, 5020.433614662677, 
755.6092402629013, 4875.746616941608, 4475.686797913903, 5708.15241044486, 
4645.3706511176315, 4339.683740299868, 5279.269664855138, 4537.811463269559], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679995/1000000 [5:44:34<1:47:55, 49.42it/s]global step 680000, trans_decision ep_re 4505.08902009976

{"global_step": 680000, "eval_re": [3728.749319281136, 4137.9897747850855, 
4918.109605765565, 4181.622444530074, 4994.102072592798, 5210.073068651999, 
4419.40181965411, 4901.842276892616, 4372.453663508406, 4186.546155335801], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [5:49:14<1:41:32, 50.89it/s]global step 690000, trans_decision ep_re 4684.09144715644

{"global_step": 690000, "eval_re": [5123.056341460051, 4922.629133891886, 
4695.583881416495, 4713.148726893836, 4093.6322775770536, 4571.609832403876, 
4301.4275369272245, 4535.499921384277, 5166.36902133712, 4717.957798272575], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699995/1000000 [5:54:04<1:38:38, 50.69it/s]global step 700000, trans_decision ep_re 4684.531100605132

{"global_step": 700000, "eval_re": [4351.404354471885, 4587.415138830418, 
5000.4275862659515, 4522.244092530059, 4795.3903794295, 5139.037986779527, 
4411.175562700537, 4408.316640422292, 5190.956922882122, 4438.942341739031], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709995/1000000 [5:58:44<1:35:10, 50.78it/s]global step 710000, trans_decision ep_re 3939.8891994731484

{"global_step": 710000, "eval_re": [4335.842377365727, 3949.2341204144177, 
3418.591790845188, 3428.8432780790035, 4091.470689321006, 4389.220172255404, 
3966.436228283055, 4522.75695449306, 3568.211379270891, 3728.2850044037323], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719995/1000000 [6:03:24<1:32:17, 50.57it/s]global step 720000, trans_decision ep_re 4229.392924575797

{"global_step": 720000, "eval_re": [4361.501763802671, 4447.992459648458, 
3881.276582440214, 4851.948806651537, 4222.444389494549, 4839.7006948960925, 
4183.392836954189, 4718.4722575482065, 3659.3726978643504, 3127.826756457696], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729995/1000000 [6:08:04<1:29:07, 50.49it/s]global step 730000, trans_decision ep_re 4680.449715318697

{"global_step": 730000, "eval_re": [4590.480371907341, 4677.327759474513, 
4157.158744178867, 5074.72271891566, 4450.55626659056, 4976.275445790307, 
4433.825759429578, 4679.737331894251, 5105.435652157606, 4658.977102848288], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739995/1000000 [6:12:54<1:25:33, 50.65it/s]global step 740000, trans_decision ep_re 4885.733297436456

{"global_step": 740000, "eval_re": [5503.164511208935, 4774.18435032521, 
5138.304376562173, 4579.351253018864, 4607.916553462573, 4722.260972584534, 
5209.340970316427, 5020.501595506522, 5049.584474301786, 4252.723917077534], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749995/1000000 [6:17:34<1:22:09, 50.71it/s]global step 750000, trans_decision ep_re 4277.133247808845

{"global_step": 750000, "eval_re": [4478.703770634712, 3299.6256792338054, 
4264.515628313669, 4440.529659653737, 4798.596845613554, 4822.763085420295, 
4773.121992115095, 3933.79433894424, 3947.653021360754, 4012.028456798583], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759995/1000000 [6:22:24<1:18:29, 50.97it/s]global step 760000, trans_decision ep_re 4454.6942073623995

{"global_step": 760000, "eval_re": [5188.232773204659, 4242.263968326204, 
2823.5589335437303, 3430.0908646325033, 5028.010361883144, 5540.2102786541445, 
4608.486202446891, 4597.955294973106, 3653.866084427182, 5434.267311532436], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [6:27:04<1:15:32, 50.75it/s]global step 770000, trans_decision ep_re 4293.161419152198

{"global_step": 770000, "eval_re": [2727.3696919988533, 5590.061127051909, 
5001.235523026796, 1730.9631893664302, 4814.644906300642, 4519.271536451568, 
4905.212787650859, 5178.299846420757, 3606.044072750173, 4858.511510503988], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779995/1000000 [6:31:44<1:12:05, 50.86it/s]global step 780000, trans_decision ep_re 4669.661082210498

{"global_step": 780000, "eval_re": [3963.104349346254, 3533.3009268200153, 
4943.154581221332, 4879.801371990208, 4748.6237770376465, 4840.866141852967, 
4380.948539596772, 4827.479180341148, 5501.643549431624, 5077.688404467016], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789995/1000000 [6:36:34<1:08:51, 50.83it/s]global step 790000, trans_decision ep_re 4403.066474073092

{"global_step": 790000, "eval_re": [4274.257606545093, 4831.001481377496, 
4115.028300564566, 4601.218080251632, 4904.613924993634, 3998.342030815074, 
4496.647124632479, 3579.710953015353, 4664.706960772546, 4565.138277763053], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799995/1000000 [6:41:14<1:05:42, 50.73it/s]global step 800000, trans_decision ep_re 4433.542160511679

{"global_step": 800000, "eval_re": [5004.541299122351, 4789.983809375853, 
4042.448966529332, 4675.732200243004, 4928.789695821293, 4566.237819398346, 
3790.8892853147477, 4989.244901305661, 4212.19323652234, 3335.3603914838636], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809995/1000000 [6:45:54<1:02:32, 50.63it/s]global step 810000, trans_decision ep_re 4707.964547644883

{"global_step": 810000, "eval_re": [3917.915518407101, 4931.001678284831, 
5169.28612521724, 4712.189487613139, 4897.382703334051, 5383.98925154738, 
5203.781028847825, 5120.928061312877, 3290.5115858432746, 4452.660036041114], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [6:50:44<59:20, 50.55it/s]global step 820000, trans_decision ep_re 4922.426854652194

{"global_step": 820000, "eval_re": [5080.302381636376, 4461.530738884847, 
4420.110594784445, 5410.328068662748, 4655.989418008193, 5258.148736600268, 
4965.653188173271, 5337.925921398772, 4721.04037298639, 4913.239125386636], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829995/1000000 [6:55:25<55:39, 50.91it/s]global step 830000, trans_decision ep_re 4784.040583608939

{"global_step": 830000, "eval_re": [4574.017078378345, 5497.333008098102, 
4958.427838146275, 5088.512125239471, 4304.735651443719, 5177.07585180098, 
5451.124980328158, 5253.484929951086, 3072.6489664981195, 4463.045406205135], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839995/1000000 [7:00:15<52:32, 50.76it/s]global step 840000, trans_decision ep_re 4446.230907282541

{"global_step": 840000, "eval_re": [4566.2034689296415, 4036.3382493343192, 
4052.0719415140584, 3896.505084520166, 5202.864583237102, 4625.327174046494, 
4308.63578133589, 3579.6493182210306, 5247.266907026282, 4947.446564660431], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849995/1000000 [7:04:55<49:00, 51.01it/s]global step 850000, trans_decision ep_re 4318.667988023601

{"global_step": 850000, "eval_re": [4940.104231569402, 5052.833394866647, 
4537.341653908773, 3813.0288494247375, 3584.7765084762555, 3894.106517776943, 
4492.670720963027, 4607.511036046467, 4448.977069241657, 3815.3298979620968], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [7:09:45<45:47, 50.96it/s]global step 860000, trans_decision ep_re 4514.874904380942

{"global_step": 860000, "eval_re": [4534.188297009398, 4652.140540146954, 
4739.472160652461, 4838.8799491472355, 4897.219896783636, 4088.9436214389084, 
4257.316789742331, 4729.781097071323, 3413.310011516433, 4997.496680300746], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [7:14:25<42:26, 51.05it/s]global step 870000, trans_decision ep_re 4927.688211580669

{"global_step": 870000, "eval_re": [4837.4778490402805, 5187.251596732228, 
4911.261179196673, 5037.060979797698, 4826.209335211092, 3992.4282005852265, 
5149.610523719329, 5094.152554452734, 5377.313110625548, 4864.116786445889], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [7:19:05<39:14, 50.96it/s]global step 880000, trans_decision ep_re 4863.823124671565

{"global_step": 880000, "eval_re": [5574.892423160496, 4851.768191056725, 
5835.519995347553, 4588.109488771521, 4210.147369368867, 4123.315402566443, 
5647.988445131652, 4176.187447931253, 5007.623771434909, 4622.678711946229], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889995/1000000 [7:23:45<35:57, 50.98it/s]global step 890000, trans_decision ep_re 4975.455508143796

{"global_step": 890000, "eval_re": [5013.632219247081, 5081.139245179829, 
5740.447550772721, 5652.783042947959, 4740.704341421073, 4320.634749363854, 
4478.29445184432, 5190.779522743803, 4615.160191889428, 4920.979766027895], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [7:28:35<32:48, 50.80it/s]global step 900000, trans_decision ep_re 4087.7250127572493

{"global_step": 900000, "eval_re": [4902.005434685864, 686.5671246288551, 
4803.199208405611, 4989.176149541758, 5117.1530428738815, 5086.9652434479, 
4117.642066800428, 1293.3196755093816, 4746.33167766321, 5134.890504015601], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909995/1000000 [7:33:15<54:29, 27.53it/s]global step 910000, trans_decision ep_re 4569.023021815165

{"global_step": 910000, "eval_re": [4927.4940248096955, 4358.967037614367, 
4815.693465568274, 2955.152572601781, 4971.197221142111, 4626.803186792958, 
4903.589034589634, 5197.115364631387, 4846.045328129205, 4088.172982272234], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [7:38:05<26:29, 50.32it/s]global step 920000, trans_decision ep_re 4453.63197587713

{"global_step": 920000, "eval_re": [4168.600941961204, 5139.301667091995, 
2352.7436723168084, 4723.632124108664, 5225.875679986906, 5163.540731619619, 
5061.712179407358, 4647.976816230409, 3884.0582789023997, 4168.877667145927], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929995/1000000 [7:42:55<23:09, 50.37it/s]global step 930000, trans_decision ep_re 4648.817335830387

{"global_step": 930000, "eval_re": [4154.595215705548, 3552.328522731943, 
4217.102933412278, 5132.174935628498, 5236.957485723321, 3496.765504668057, 
4964.949400842419, 5868.03082364666, 4002.7942444375612, 5862.474291507582], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [7:47:35<19:52, 50.33it/s]global step 940000, trans_decision ep_re 5040.367433774731

{"global_step": 940000, "eval_re": [5091.77393140312, 5307.166844685734, 
5095.456223815399, 4762.658815477449, 4934.803941348629, 5477.693264936483, 
5467.95409760995, 5162.467311744824, 4493.215901310869, 4610.484005414841], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949995/1000000 [7:52:25<16:31, 50.44it/s]global step 950000, trans_decision ep_re 4635.0821024926445

{"global_step": 950000, "eval_re": [5804.167784902718, 4571.742196608291, 
5319.585680130741, 4723.941144048196, 5459.759896208151, 3709.574805481619, 
5873.399271525102, 4907.842873372394, 5052.938156318273, 927.8692163309513], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959995/1000000 [7:57:05<13:10, 50.61it/s]global step 960000, trans_decision ep_re 4779.307248696778

{"global_step": 960000, "eval_re": [3723.596433482765, 4728.146145481533, 
5186.4588231903435, 5272.089317236231, 4710.941508805161, 5350.238472183076, 
3854.273025403655, 5668.930249951114, 5797.821559815791, 3500.576951418111], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [8:01:55<09:54, 50.46it/s]global step 970000, trans_decision ep_re 4636.835338223161

{"global_step": 970000, "eval_re": [5225.669424895068, 4979.288754480715, 
5307.226427748189, 4518.807202253172, 5092.578552134239, 163.5808047373791, 
5388.125586922013, 5288.514258647425, 5372.249611961439, 5032.312758451968], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979995/1000000 [8:06:35<06:34, 50.65it/s]global step 980000, trans_decision ep_re 4802.400120317987

{"global_step": 980000, "eval_re": [3710.251112826955, 5123.445931781048, 
5193.846589034379, 4874.42978455008, 4191.1618340404975, 5812.507570125761, 
4692.492200190945, 3978.499737784817, 5592.202905727508, 4855.163537117876], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [8:11:25<03:38, 45.81it/s]global step 990000, trans_decision ep_re 4414.97657901222

{"global_step": 990000, "eval_re": [4686.2572193821625, 3259.6109132476727, 
5819.919996079207, 4968.557364495843, 5463.986298540298, 4841.901405192424, 
4600.847095706368, 1994.2716629329614, 4121.720826551972, 4392.693007993289], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999995/1000000 [8:16:15<00:00, 50.62it/s]global step 1000000, trans_decision ep_re 5424.964924875259

{"global_step": 1000000, "eval_re": [5306.393395444215, 5528.997883751408, 
4908.248073961029, 5420.393656332878, 5306.708568659415, 5394.859258608676, 
5923.366556129633, 4992.0320828499225, 5697.622031368035, 5771.027741647379], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [8:16:28<00:00, 33.57it/s]
