
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:30<10:32:57, 26.07it/s]global step 10000, trans_decision ep_re 369.0037196395894

{"global_step": 10000, "eval_re": [1162.951641992031, 913.2286418212514, 
183.6211895109756, -135.55503018772646, -86.0744254219208, 422.7689167027113, 
313.6496653712259, 272.1235226074821, 726.5451622391415, -83.22208823927704], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19998/1000000 [13:30<10:17:34, 26.45it/s]global step 20000, trans_decision ep_re 215.2887990064823

{"global_step": 20000, "eval_re": [337.8049142290121, 510.93377596268357, 
267.16283863406534, 266.2604068878169, 396.54307398576117, 273.17210214608383, 
-239.6338872404687, -288.54631219381645, 307.8720545986095, 321.31902305507583],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29997/1000000 [22:20<10:19:34, 26.09it/s]global step 30000, trans_decision ep_re 1038.160300257804

{"global_step": 30000, "eval_re": [914.8079355365023, 947.8465397799204, 
1028.3045489701228, 926.8390830929327, 971.7590668859964, 1484.8236291169717, 
1093.2590597593016, 1004.4258616352595, 1008.2203211261163, 1001.3169566749159],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39998/1000000 [31:20<10:05:21, 26.43it/s]global step 40000, trans_decision ep_re 1132.5897487790733

{"global_step": 40000, "eval_re": [1099.350035524302, 1310.9377418723655, 
1205.054652572731, 1093.613197882994, 1000.2423706734893, 1339.0478370588, 
1116.4437974729688, 966.041390007546, 989.738784083444, 1205.4276806420926], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49997/1000000 [40:20<10:08:27, 26.02it/s]global step 50000, trans_decision ep_re 1361.194130740731

{"global_step": 50000, "eval_re": [1080.9687801211116, 1144.3559810407576, 
1486.874633768918, 1980.4541367596983, 1207.3872634223533, 1129.9769684398332, 
2028.863759498782, 1062.3874511310314, 1076.528933266559, 1414.1433999582641], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [49:20<10:00:43, 26.08it/s]global step 60000, trans_decision ep_re 1184.807893298671

{"global_step": 60000, "eval_re": [1179.9927976976885, 1162.1893504133636, 
1154.8495883738913, 1344.563128006368, 1107.2436195788448, 1193.233268899958, 
1209.8783282500754, 1219.4860334307118, 1111.2556693057968, 1165.38714903001], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69998/1000000 [58:20<9:48:47, 26.33it/s]global step 70000, trans_decision ep_re 1498.2551534952495

{"global_step": 70000, "eval_re": [1164.6580299818402, 1674.604672350215, 
1409.905749274085, 1228.2601629442881, 2273.8985852024352, 1151.1391950327718, 
1221.121366392875, 2118.7462949899295, 1535.8716113103646, 1204.3458674736912], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79997/1000000 [1:07:20<9:50:44, 25.96it/s]global step 80000, trans_decision ep_re 1956.8086599972714

{"global_step": 80000, "eval_re": [1880.2651189865987, 1945.0136212575721, 
1650.337822859446, 1273.0857471440363, 1507.1706459767877, 1476.6506774412003, 
2332.9299045781336, 3060.8180625272025, 2157.6490897671965, 2284.1659094345373],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89997/1000000 [1:16:20<9:42:46, 26.03it/s]global step 90000, trans_decision ep_re 2167.8502933759073

{"global_step": 90000, "eval_re": [1229.5234633196128, 1717.057161045965, 
1696.7037340741012, 3209.536764205837, 1283.0645109922261, 2133.8867478811953, 
2835.2747673313675, 2810.341063681374, 1966.7422790479259, 2796.3724421794686], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99998/1000000 [1:25:20<9:30:46, 26.28it/s]global step 100000, trans_decision ep_re 1942.8090426707563

{"global_step": 100000, "eval_re": [2588.7748005314743, 1167.8857586906176, 
2305.052327577674, 2106.179838826311, 1151.164835869783, 1364.2989218385826, 
1868.1358286276777, 1813.837609186701, 1348.135829806766, 3714.6246757519766], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109999/1000000 [1:34:20<9:29:33, 26.04it/s]global step 110000, trans_decision ep_re 2245.3377666090646

{"global_step": 110000, "eval_re": [1382.5059361974434, 3755.3564219515574, 
1188.042478694987, 2084.296691754045, 1412.1100278417866, 2957.4043974239485, 
1433.153361766277, 1228.234534465934, 3004.4828481294558, 4007.7909678652095], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:43:20<9:24:05, 26.00it/s]global step 120000, trans_decision ep_re 1542.3499818627156

{"global_step": 120000, "eval_re": [1403.0340936900438, 1422.3910689735583, 
2539.1980072180318, 1253.3115298213659, 1472.0781577670587, 1210.268229112232, 
1427.4790138180151, 1500.9016939324722, 1637.4007139651956, 1557.4373103291841],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [1:52:10<9:15:33, 26.10it/s]global step 130000, trans_decision ep_re 3400.7833848295086

{"global_step": 130000, "eval_re": [2687.024786434744, 3505.4537733731026, 
3228.629634648127, 3390.5025909461483, 3532.3298387943632, 3454.4665000358254, 
3623.131835728344, 3171.6802293374762, 3784.97680377218, 3629.637855224774], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [2:01:10<9:13:40, 25.89it/s]global step 140000, trans_decision ep_re 3034.285609817645

{"global_step": 140000, "eval_re": [3178.9343560685334, 2689.9038397531854, 
2573.66548745398, 2458.0041647999997, 1602.413419654919, 4292.49198660033, 
2109.7852023671294, 3297.859293161868, 4079.1300632556986, 4060.6682850608054], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:10:20<9:04:47, 26.00it/s]global step 150000, trans_decision ep_re 1881.0034183004857

{"global_step": 150000, "eval_re": [1860.823474664205, 1838.445768432906, 
1589.2778493503097, 1558.6718053317088, 2142.4512677743437, 1642.6390237631795, 
1292.4797148472041, 3023.063770726538, 2072.649120523451, 1789.5323875910096], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [2:19:20<8:52:43, 26.28it/s]global step 160000, trans_decision ep_re 2054.7868002154064

{"global_step": 160000, "eval_re": [1999.6133673420331, 2667.7749021822856, 
1690.6832966476497, 1943.127988741635, 1870.9832704374442, 1771.4144054843146, 
1777.5499934625252, 1896.099657688081, 1896.5148002988994, 3034.1063198691936], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [2:28:20<8:52:03, 26.00it/s]global step 170000, trans_decision ep_re 2233.3056541359174

{"global_step": 170000, "eval_re": [1726.341227004528, 1557.740383201886, 
1887.572730479772, 2332.5426137672844, 1491.7588797678445, 1352.585684021564, 
2148.05371380988, 4032.4467887279397, 1430.536097284702, 4373.478423293779], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179999/1000000 [2:37:10<8:42:39, 26.15it/s]global step 180000, trans_decision ep_re 2012.7410954288848

{"global_step": 180000, "eval_re": [1879.7857781495293, 3009.888204293246, 
2820.7507996396507, 1290.4130565359146, 1350.4369447142906, 2917.5777837627575, 
1509.7602555500048, 1411.1543747636122, 2293.941218842255, 1643.702538037588], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189998/1000000 [2:46:10<8:31:57, 26.37it/s]global step 190000, trans_decision ep_re 2316.3790259300617

{"global_step": 190000, "eval_re": [3288.5476579524047, 3104.419230209126, 
2094.9650350873535, 1521.1861578601354, 3166.0776695378095, 2116.24645504618, 
3485.8150972492504, 1504.571515200952, 1481.699961362749, 1400.261479794658], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199998/1000000 [2:55:10<8:24:39, 26.42it/s]global step 200000, trans_decision ep_re 2684.1453024887187

{"global_step": 200000, "eval_re": [3392.3043410864234, 1953.845926178797, 
3160.5808093442633, 1324.328912354979, 3004.12069391799, 3528.3293309216547, 
3363.254202075228, 2322.5669312663263, 3192.5519407226525, 1599.5699370188722], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209998/1000000 [3:04:10<8:21:02, 26.28it/s]global step 210000, trans_decision ep_re 2479.7310562544963

{"global_step": 210000, "eval_re": [1915.708165307984, 2031.3507103556424, 
4032.3117924207972, 2360.267520430958, 1379.6905004284815, 1976.2110989149407, 
2201.4894261583677, 3303.4109658137336, 4068.5357963539336, 1528.334586360121], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [3:13:10<8:29:01, 25.54it/s]global step 220000, trans_decision ep_re 2447.753002265164

{"global_step": 220000, "eval_re": [3128.095368606805, 3238.3825074446854, 
2311.636769996257, 3239.768882284479, 1734.1026359046975, 2632.6116795815633, 
2622.139219848346, 1292.0318896286985, 2672.6283970381073, 1606.1326723180039], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [3:22:10<8:12:07, 26.08it/s]global step 230000, trans_decision ep_re 2696.9707616577375

{"global_step": 230000, "eval_re": [2898.742790766719, 2661.8876229756465, 
2687.945996658955, 2925.884858800196, 2486.6093670006917, 2946.9460769581597, 
2088.771634955941, 2722.2982155646646, 2808.2762507866164, 2742.3448021097834], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [3:31:10<8:07:40, 25.97it/s]global step 240000, trans_decision ep_re 2781.2264874959014

{"global_step": 240000, "eval_re": [3251.1351175401487, 3158.952473212771, 
3164.580029887847, 2484.573420174473, 3420.9909344045313, 1235.3972517811312, 
3217.762069459235, 1555.8638466191094, 3090.5137785239253, 3232.4959533558467], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [3:40:10<7:52:30, 26.45it/s]global step 250000, trans_decision ep_re 2984.521113908891

{"global_step": 250000, "eval_re": [3181.4378939060275, 3079.558614397995, 
3294.1673272635185, 2538.814954708019, 2935.5520247196764, 2880.2358621906596, 
2929.1350539778396, 2989.2789499579503, 2766.839163883546, 3250.1912940836796], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [3:49:10<7:53:25, 26.05it/s]global step 260000, trans_decision ep_re 2758.4496156936057

{"global_step": 260000, "eval_re": [2752.523807708196, 3116.77072659497, 
2323.1982901544866, 2582.572531610935, 2882.2193201506448, 2617.3325982181127, 
2764.6699119105815, 2695.643483707314, 2921.0796746120636, 2928.485812268752], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [3:58:10<7:49:53, 25.89it/s]global step 270000, trans_decision ep_re 1694.8357334410005

{"global_step": 270000, "eval_re": [1827.5913321709586, 1563.8915413674576, 
1580.4504337178032, 1284.6969815403716, 2499.8825957511913, 1336.534673716175, 
2106.644481951554, 1521.52705470172, 1557.2052760501904, 1669.9329634425826], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279998/1000000 [4:07:10<7:36:22, 26.29it/s]global step 280000, trans_decision ep_re 2124.804395414777

{"global_step": 280000, "eval_re": [2065.803622069905, 1547.5858351426598, 
2365.146065064114, 3222.999812139702, 1133.3599060803701, 1832.7841075209783, 
3061.0926297711694, 1402.741914661517, 3181.6733204358284, 1434.8567412615264], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289998/1000000 [4:16:10<7:28:49, 26.37it/s]global step 290000, trans_decision ep_re 2284.93363322752

{"global_step": 290000, "eval_re": [2247.4665690460943, 2241.3807350484144, 
1376.6065482973179, 2883.457428717005, 1772.6570570477184, 3108.674356661072, 
1424.9463273038502, 3152.3229374322536, 3295.7166480420747, 1346.1077246794007],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299998/1000000 [4:25:10<7:25:48, 26.17it/s]global step 300000, trans_decision ep_re 1652.6490879197659

{"global_step": 300000, "eval_re": [2390.1134775354385, 2090.6237180015323, 
1331.2955262460343, 1456.9841586682805, 1475.7711690441133, 1458.4809636416187, 
1744.2201615453716, 1408.4051367606269, 1489.0686164654762, 1681.527951289168], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309998/1000000 [4:34:10<7:13:53, 26.50it/s]global step 310000, trans_decision ep_re 1853.0163956711126

{"global_step": 310000, "eval_re": [1240.870827115899, 3062.250821951421, 
1463.9159061556147, 1488.5822608148142, 1816.3760674114053, 1928.4967297697447, 
2438.558548379043, 1592.9517028910298, 2273.601986124552, 1224.5591060976037], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [4:43:10<7:14:54, 26.06it/s]global step 320000, trans_decision ep_re 2141.3191339650884

{"global_step": 320000, "eval_re": [1453.8611177933133, 1261.816983521995, 
2358.883773282539, 2527.6719141202325, 1385.0045451254011, 2310.787174985515, 
3405.0918317823616, 1299.3251647333057, 3462.0612060754734, 1948.6876282307448],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [4:52:10<7:11:52, 25.86it/s]global step 330000, trans_decision ep_re 2833.3598819477147

{"global_step": 330000, "eval_re": [2913.9550476719946, 3428.00035974987, 
3329.0117580604, 3456.9883112294165, 1673.4552676286307, 2797.551445244243, 
2950.7150726518034, 3333.2278569986443, 3121.012104795255, 1329.6815954468912], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339998/1000000 [5:01:10<7:00:19, 26.17it/s]global step 340000, trans_decision ep_re 2090.154977785837

{"global_step": 340000, "eval_re": [1295.225533600661, 2417.555695929625, 
1677.2573769656628, 1284.8494354434308, 1604.806375085085, 1690.664441047291, 
1720.025394241593, 2002.5905423928098, 3959.7324867209068, 3248.842496431304], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [5:10:10<7:00:31, 25.76it/s]global step 350000, trans_decision ep_re 2031.7433865194919

{"global_step": 350000, "eval_re": [1598.1848517483224, 1555.1604368861365, 
1359.1195292049626, 1700.8425292218503, 2888.8056860590636, 2243.462464638892, 
1943.2181818172915, 2778.3060387897312, 2366.0278157184007, 1884.3063311102655],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [5:19:20<6:51:00, 25.95it/s]global step 360000, trans_decision ep_re 2364.416319427201

{"global_step": 360000, "eval_re": [3194.4233726570665, 2995.1156941552717, 
1635.753047040539, 1514.674207788545, 3453.641266546597, 1306.0174151945266, 
1453.487423304457, 3328.5082481190298, 3321.2472928849447, 1441.2952265810322], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [5:28:20<6:39:24, 26.29it/s]global step 370000, trans_decision ep_re 2681.042014216596

{"global_step": 370000, "eval_re": [3121.147874260043, 2673.723879802213, 
2540.862873787975, 2732.571908464374, 2779.398299703593, 2502.0465693683886, 
2300.5294396725667, 2663.982973024074, 2712.84122754553, 2783.3150965371997], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379998/1000000 [5:37:20<6:32:14, 26.34it/s]global step 380000, trans_decision ep_re 2561.2728941010632

{"global_step": 380000, "eval_re": [1262.547697996549, 2387.0080170862893, 
3407.5628413871473, 1378.310306485398, 4000.304466628521, 1506.8328028790722, 
3707.0306770524307, 2433.64756265197, 4324.161300097637, 1205.3232687456195], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389997/1000000 [5:46:20<6:33:40, 25.83it/s]global step 390000, trans_decision ep_re 2095.577835936589

{"global_step": 390000, "eval_re": [1101.487775624617, 1755.4460902615228, 
2211.166945968609, 1813.8009898379946, 3652.222072874356, 1133.3228843897643, 
3449.0114617864615, 2307.0436456365437, 2228.0405155155195, 1304.2359774705012],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [5:55:20<6:28:43, 25.72it/s]global step 400000, trans_decision ep_re 2852.8789213367554

{"global_step": 400000, "eval_re": [3074.840090930774, 2864.0146595665055, 
2524.918555305481, 3129.8213648864034, 2357.1226480593696, 3026.0912763497804, 
3113.655404883623, 2974.2379611336432, 2708.426900121417, 2755.6603521305615], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [6:04:20<6:18:35, 25.97it/s]global step 410000, trans_decision ep_re 2533.3650249525576

{"global_step": 410000, "eval_re": [2713.9629564001402, 1494.014607840453, 
2993.7652168204695, 1690.1229840397486, 2387.377509020412, 3015.5861645801483, 
3043.5590474089204, 2277.74640861426, 2611.007990815242, 3106.5073639857874], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419999/1000000 [6:13:20<6:10:42, 26.08it/s]global step 420000, trans_decision ep_re 2326.947950948605

{"global_step": 420000, "eval_re": [1776.4572491519605, 3813.2337895094392, 
1886.1686425068633, 1701.958557830059, 2894.5294010168036, 1470.8222727504497, 
1779.6450509258923, 2590.057798707986, 2220.38492050628, 3136.2218265803126], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429998/1000000 [6:22:20<6:02:10, 26.23it/s]global step 430000, trans_decision ep_re 2178.8529597545116

{"global_step": 430000, "eval_re": [2078.452553134322, 3477.610179717289, 
1802.6030062836228, 1697.751381607493, 1401.8965846071785, 3056.2223011118417, 
2035.0553238391067, 1842.4222446280191, 1172.3284775389948, 3224.1875450772477],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [6:31:20<5:59:57, 25.93it/s]global step 440000, trans_decision ep_re 2141.508417607678

{"global_step": 440000, "eval_re": [2341.4811847179326, 2570.461356864733, 
1599.0945209198965, 1574.2889700302549, 2679.681925840188, 1429.9320686877518, 
2982.8952653884394, 1633.4023016791534, 3343.1812657392634, 1260.665316209169], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449998/1000000 [6:40:30<5:53:30, 25.93it/s]global step 450000, trans_decision ep_re 2321.1875599157715

{"global_step": 450000, "eval_re": [2621.1211914929145, 1987.4269741851817, 
3006.820128173256, 2294.9965530311074, 2782.151417746473, 2503.847509134956, 
2029.3294582632323, 1258.963618324015, 2659.193094670238, 2068.025654136341], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459997/1000000 [6:49:30<5:49:07, 25.78it/s]global step 460000, trans_decision ep_re 2241.458831909405

{"global_step": 460000, "eval_re": [2576.6453331963935, 1904.226307700077, 
1540.1206107154214, 2220.159875040363, 2373.441219807234, 1652.7604070761213, 
2428.244444414506, 2664.7687583605757, 2628.355680851418, 2425.865681931943], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [6:58:30<5:41:46, 25.85it/s]global step 470000, trans_decision ep_re 1931.0866710235982

{"global_step": 470000, "eval_re": [1596.5127110118624, 2455.77005670081, 
1161.6147261086353, 3005.4490157735663, 2151.8782524435514, 1867.8897352181584, 
1783.3679499378313, 2175.1959804437747, 1856.3340322946026, 1256.8542503031897],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479998/1000000 [7:07:40<5:29:12, 26.33it/s]global step 480000, trans_decision ep_re 2085.042292150237

{"global_step": 480000, "eval_re": [1457.5959377077272, 2686.7250890852015, 
2968.600714491456, 1371.2470451739273, 1553.943989810882, 3217.7118292312325, 
1741.757780698047, 1936.5008507516484, 2504.2333813922696, 1412.1063031599724], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489997/1000000 [7:16:40<5:31:42, 25.63it/s]global step 490000, trans_decision ep_re 1779.4234633161545

{"global_step": 490000, "eval_re": [1625.6170665866834, 1423.206248771382, 
2750.754669980285, 1243.0790222099079, 1250.0073722529448, 1728.6369669343053, 
1615.9123108172291, 1792.529165343518, 2302.4352784038024, 2062.0565318614895], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499997/1000000 [7:25:40<5:24:02, 25.72it/s]global step 500000, trans_decision ep_re 2400.7394228022595

{"global_step": 500000, "eval_re": [1702.8163921289781, 1848.8362032038601, 
2035.9277073940907, 3210.758685878257, 2058.780062063742, 3016.3202457679085, 
1806.2164393863802, 2305.130542751122, 3619.2578596082476, 2403.3500898400066], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [7:34:40<5:12:41, 26.12it/s]global step 510000, trans_decision ep_re 1675.1258018813055

{"global_step": 510000, "eval_re": [1875.8322323184063, 1954.4730207067792, 
1837.6847336486405, 1259.549710167173, 1035.7828530710067, 2593.8868925906804, 
1646.6059555486404, 1859.3188403678603, 1489.8202256338448, 1198.3035547600261],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519998/1000000 [7:43:40<5:02:59, 26.40it/s]global step 520000, trans_decision ep_re 2266.8073945931023

{"global_step": 520000, "eval_re": [1660.9837881070127, 2400.3774757642605, 
2550.2173111361553, 1320.668744344232, 1804.5637857480567, 1983.4707150804102, 
1677.4954751906212, 3181.982867954352, 2995.81840635348, 3092.495376252444], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529998/1000000 [7:52:40<4:55:56, 26.47it/s]global step 530000, trans_decision ep_re 2016.6629036022914

{"global_step": 530000, "eval_re": [2274.1228688654314, 1977.5938466189468, 
2498.6743273078396, 1703.4518085681088, 1430.0709653918477, 1914.4916617166575, 
2654.400224319163, 2508.5861574271335, 1355.8731598129446, 1849.364015994841], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [8:01:40<4:54:06, 26.07it/s]global step 540000, trans_decision ep_re 1712.1053600985401

{"global_step": 540000, "eval_re": [1675.7668419204088, 2433.1350746059975, 
1864.6129600349407, 2328.620773832909, 1941.499243085753, 2018.4804833303135, 
176.2838555137032, 1617.6479506386174, 1246.5576350374972, 1818.448782985259], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549998/1000000 [8:10:40<4:44:51, 26.33it/s]global step 550000, trans_decision ep_re 1757.5445727291274

{"global_step": 550000, "eval_re": [1428.1675651622268, 2831.0403488269385, 
2046.1770923714864, 1428.3942592173796, 1375.6797449181695, 2060.437244833715, 
2171.467332398767, 1414.272503187502, 1393.368107466413, 1426.4415289086764], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [8:19:40<4:40:34, 26.14it/s]global step 560000, trans_decision ep_re 1672.9945708882283

{"global_step": 560000, "eval_re": [1915.1006973105643, 1420.6381256520606, 
1344.1578756506278, 1496.3286295471232, 1677.2099449182654, 2230.8223821588526, 
1509.62889166556, 1698.2642251865768, 2209.1264512802572, 1228.668485512393], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569998/1000000 [8:28:30<4:29:44, 26.57it/s]global step 570000, trans_decision ep_re 1674.8406712720637

{"global_step": 570000, "eval_re": [1276.599990993683, 1103.7963881375879, 
2061.403885343654, 1121.803641379797, 2716.111710431091, 1206.709059658521, 
3029.268360412632, 1388.790489158842, 1540.5369404825244, 1303.3862467223062], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579998/1000000 [8:37:30<4:24:47, 26.44it/s]global step 580000, trans_decision ep_re 1986.8787209913942

{"global_step": 580000, "eval_re": [1833.6075930367294, 2553.524988523343, 
3096.252181126549, 2577.5317448564256, 1260.3971880100012, 2458.1175787714515, 
1351.9552571467077, 1925.0565254161359, 1372.3833345092114, 1439.960818517389], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [8:46:30<4:21:42, 26.11it/s]global step 590000, trans_decision ep_re 1783.722871350299

{"global_step": 590000, "eval_re": [1386.2122281488687, 1113.313409616739, 
1118.7126811021499, 2486.0974747102105, 1813.170152374777, 1477.7552456344931, 
1409.4720594582525, 3075.824386917341, 1379.9754842898724, 2576.6955912502863], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [8:55:20<4:14:58, 26.15it/s]global step 600000, trans_decision ep_re 2172.542626967907

{"global_step": 600000, "eval_re": [1910.1461018007005, 1789.3187895673063, 
1330.242846489275, 1346.4225651332702, 2711.8231057739126, 1465.82708644967, 
2318.1775114318466, 3009.110270043544, 3087.9018937243613, 2756.456099265181], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609997/1000000 [9:04:20<4:07:36, 26.25it/s]global step 610000, trans_decision ep_re 4167.062471658035

{"global_step": 610000, "eval_re": [3918.7640578487444, 3788.154975213008, 
4332.75326058276, 4230.742747232881, 4333.441787529308, 4330.516533058211, 
4279.034068548718, 4303.650608421319, 3891.7918315669776, 4261.774846578427], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [9:13:10<4:01:01, 26.28it/s]global step 620000, trans_decision ep_re 1799.169367265617

{"global_step": 620000, "eval_re": [2449.25603424453, 1452.6694058960018, 
1553.3861723210239, 1891.7963272201275, 1919.6643197676283, 2058.9496221324926, 
1815.0589669988287, 1899.025840579642, 1692.7336231842585, 1259.1533603116343], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629998/1000000 [9:22:10<3:54:10, 26.33it/s]global step 630000, trans_decision ep_re 1874.2235717857468

{"global_step": 630000, "eval_re": [1423.3152978832868, 1826.6247321501808, 
1845.0730532375542, 1316.5035022526308, 1577.8302068355601, 2331.0745546455287, 
1776.862627587313, 2708.831328745812, 1420.6127461562949, 2515.507668363305], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639998/1000000 [9:31:10<3:46:43, 26.46it/s]global step 640000, trans_decision ep_re 2129.426121708056

{"global_step": 640000, "eval_re": [1451.3113077152925, 2391.1793052114754, 
2377.503550370102, 1222.775062218682, 2180.6475108462487, 2136.8118867681756, 
3361.133446501853, 2248.2024874597605, 2221.209779145308, 1703.486880843662], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649998/1000000 [9:40:00<3:39:26, 26.58it/s]global step 650000, trans_decision ep_re 2116.0887666179824

{"global_step": 650000, "eval_re": [2165.346918280291, 1212.4928119614308, 
1579.3238104058762, 2566.1370429685335, 1816.6805145768162, 2923.1978942877104, 
1880.1404496583016, 1933.3319187318118, 2632.4446061587114, 2451.7916991503394],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [9:49:00<3:37:08, 26.10it/s]global step 660000, trans_decision ep_re 2183.772162143403

{"global_step": 660000, "eval_re": [1573.3437634764234, 2595.023585001872, 
2484.931008257317, 1539.97589674546, 2212.7043847040113, 1776.161232810232, 
2535.3816312178446, 2631.1378130198946, 2156.7332107817806, 2332.3290954191934],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [9:57:50<3:28:36, 26.36it/s]global step 670000, trans_decision ep_re 1700.5021615653327

{"global_step": 670000, "eval_re": [2927.2267156707394, 1337.5264494310802, 
1114.5975722445173, 2401.9284455719603, 1181.9769938072561, 1508.023283608071, 
1405.1857263544637, 1272.7383312659774, 2260.655199529503, 1595.16289816976], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679998/1000000 [10:06:50<3:18:57, 26.81it/s]global step 680000, trans_decision ep_re 1729.769523694149

{"global_step": 680000, "eval_re": [1931.6222286123616, 1198.080863006166, 
1374.503402299598, 2010.1906720646518, 1456.738283277965, 2745.60391237126, 
1524.6668494013009, 1979.110662136095, 1811.2353261442083, 1265.9430376278842], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689998/1000000 [10:15:40<3:14:46, 26.53it/s]global step 690000, trans_decision ep_re 1896.1789109020847

{"global_step": 690000, "eval_re": [1499.9386540403548, 2797.820710276183, 
2140.7993251990556, 1149.7537407274135, 2612.8122113614313, 1578.948864066791, 
2092.610242984777, 1863.2291167237538, 1334.3453942920582, 1891.5308493490286], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699997/1000000 [10:24:40<3:11:06, 26.16it/s]global step 700000, trans_decision ep_re 1569.1093485517786

{"global_step": 700000, "eval_re": [1414.5429292397105, 1457.4717060742025, 
1799.4761113058637, 1544.6497091092085, 1143.7424943234698, 2000.979523936145, 
1227.439174349247, 1379.1661585694676, 1780.338631486744, 1943.287047123728], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709998/1000000 [10:33:30<3:03:01, 26.41it/s]global step 710000, trans_decision ep_re 1993.878065371108

{"global_step": 710000, "eval_re": [1242.712767384143, 1579.7456239969072, 
1321.5309426929625, 2063.7668097428327, 2223.7629104097555, 2751.6623437872277, 
2087.877904137062, 1446.9388046550043, 3581.244730304188, 1639.5378166009973], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [10:42:30<2:58:26, 26.15it/s]global step 720000, trans_decision ep_re 1485.0502240991195

{"global_step": 720000, "eval_re": [1416.6023512013542, 1437.3142774582514, 
1598.6271968082071, 1267.222978627996, 1216.6138509054006, 1882.8339559310202, 
1382.2107633093171, 1800.4384954870898, 1458.6119582319168, 1390.0264130306425],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729998/1000000 [10:51:20<2:49:40, 26.52it/s]global step 730000, trans_decision ep_re 1778.5582288295197

{"global_step": 730000, "eval_re": [1336.4203976216766, 2200.6590394337586, 
1698.8214199855095, 2350.002153584434, 2238.119258200541, 1466.0980065570525, 
1364.8986665050827, 1592.0093601581423, 1249.5703687151356, 2288.9836175338637],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739997/1000000 [11:00:20<2:45:02, 26.26it/s]global step 740000, trans_decision ep_re 1969.3387125264496

{"global_step": 740000, "eval_re": [2538.8892419181584, 2766.509309642819, 
2176.193742929396, 1316.2562022655472, 1633.7624418491034, 1803.5941402036756, 
2121.270836248557, 2127.6225085400815, 1231.9384336743838, 1977.3502679927774], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [11:09:10<2:40:26, 25.97it/s]global step 750000, trans_decision ep_re 1694.3085381638382

{"global_step": 750000, "eval_re": [1612.9651997119845, 2632.984480856361, 
1172.5046807771503, 1172.9171835692557, 1611.4694458532676, 2235.5749601993994, 
1799.355980949308, 1259.6726993602426, 1546.859719686927, 1898.781030674485], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759998/1000000 [11:18:10<2:30:46, 26.53it/s]global step 760000, trans_decision ep_re 1695.2121992699172

{"global_step": 760000, "eval_re": [1961.3992697890035, 1783.8453420641076, 
1882.064485834785, 1528.150914266997, 1640.4947564499746, 1344.6323733395095, 
1910.1963178712465, 1548.220513749016, 2214.3755335122487, 1138.7424858222826], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [11:27:10<2:25:15, 26.39it/s]global step 770000, trans_decision ep_re 1936.0742735519561

{"global_step": 770000, "eval_re": [2998.0334508805713, 1903.5531282415157, 
1492.8116872624037, 1650.2524983220658, 1430.293906142429, 1753.7049777653044, 
2623.9055514316838, 2726.627258655737, 1608.939253668114, 1172.6210231497366], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779999/1000000 [11:36:00<2:21:04, 25.99it/s]global step 780000, trans_decision ep_re 1882.696885868075

{"global_step": 780000, "eval_re": [2078.273041358275, 2371.5768840568726, 
2480.7850596750727, 1242.5807911459012, 1823.0243542795404, 1697.3725638080984, 
1246.981508305643, 1637.456503768502, 3026.2383528015453, 1222.6797994812994], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789998/1000000 [11:45:00<2:13:10, 26.28it/s]global step 790000, trans_decision ep_re 1806.0846687741164

{"global_step": 790000, "eval_re": [1204.9423644095543, 1181.5517034604786, 
2602.4113467598277, 2069.558358686531, 2027.7459112311192, 2032.688847635028, 
2345.257033021962, 1301.9906384767999, 1739.453850528943, 1555.246633530921], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [11:53:50<2:06:30, 26.35it/s]global step 800000, trans_decision ep_re 1799.2417443845166

{"global_step": 800000, "eval_re": [1900.886557092471, 978.7545837361766, 
1651.3672176989714, 1681.850204683814, 1193.994907629035, 3194.452878553029, 
1264.2579150450654, 2083.56628548989, 2245.630089167085, 1797.6568047496264], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809998/1000000 [12:02:50<1:59:19, 26.54it/s]global step 810000, trans_decision ep_re 1485.2026148656773

{"global_step": 810000, "eval_re": [1278.2007511402803, 1502.8166774971849, 
1489.6867135309637, 1471.4965748010152, 1651.399814522317, 1446.0772274127405, 
1284.8573741556393, 2046.1077824083045, 1344.8779710438625, 1336.5052621444659],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819998/1000000 [12:11:50<1:54:40, 26.16it/s]global step 820000, trans_decision ep_re 1737.9103892942414

{"global_step": 820000, "eval_re": [1886.799750965561, 1461.9618762769664, 
1316.5426586971043, 1235.0422186034946, 2545.2527327809194, 2609.5521905087003, 
2346.400630669249, 1153.0447541045514, 1575.8878355845375, 1248.619244751331], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [12:20:40<1:48:18, 26.16it/s]global step 830000, trans_decision ep_re 1565.7602928666292

{"global_step": 830000, "eval_re": [1489.2879721393808, 1530.0513089813549, 
1706.530048115768, 1303.9346458834168, 1565.536063668938, 1325.815262615261, 
2053.730032294058, 1247.7984246965066, 1834.6088056899873, 1600.3103645816193], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839998/1000000 [12:29:40<1:40:57, 26.41it/s]global step 840000, trans_decision ep_re 1398.31063158179

{"global_step": 840000, "eval_re": [1255.2195741337332, 1648.988832306057, 
1318.5588071794589, 1219.4503245311464, 1857.9435706285321, 1368.37377832852, 
1243.3526802681542, 1293.3088556981795, 1557.6550299610165, 1220.254862783099], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849998/1000000 [12:38:40<1:34:15, 26.52it/s]global step 850000, trans_decision ep_re 1970.2860318213984

{"global_step": 850000, "eval_re": [2142.832154987967, 2101.6763471311174, 
2566.449685387013, 2671.348838298697, 1573.2919480740327, 1269.0617254729768, 
1320.4056085133975, 2483.4155351314275, 2296.779825456481, 1277.5986497608753], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [12:47:30<1:29:07, 26.18it/s]global step 860000, trans_decision ep_re 1669.9958899921307

{"global_step": 860000, "eval_re": [2503.9121975087655, 1513.1427138027093, 
1463.6793531154003, 1219.0208661330714, 2066.7635158986336, 1295.958242172097, 
1457.3240918844458, 1773.2025005623, 2019.5835409984215, 1387.371877845464], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869998/1000000 [12:56:30<1:21:55, 26.45it/s]global step 870000, trans_decision ep_re 1609.2566544835313

{"global_step": 870000, "eval_re": [1230.8786062053034, 1254.0264853815554, 
1608.0337929592292, 1567.8516209478682, 2597.148112087952, 1143.1254245664572, 
1518.5527520289154, 1913.5552114889547, 1521.687934632136, 1737.7066045369397], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [13:05:30<1:17:10, 25.92it/s]global step 880000, trans_decision ep_re 2105.113481755705

{"global_step": 880000, "eval_re": [2325.5110835732567, 2097.6556994455864, 
2479.130187854806, 2151.8786026225844, 1919.090945589234, 1923.8365300719124, 
2304.9860196916134, 2010.3902798716538, 2279.7561776103194, 1558.8992912260844],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [13:14:30<1:10:12, 26.11it/s]global step 890000, trans_decision ep_re 1573.0067724978983

{"global_step": 890000, "eval_re": [2264.8665350612605, 1902.033911097694, 
1624.8963136331188, 1031.5882848207648, 2046.1654904393367, 1243.380762740158, 
1299.8061262495846, 1416.771647964321, 1643.7736280789466, 1256.7850248937968], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899998/1000000 [13:23:20<1:03:16, 26.34it/s]global step 900000, trans_decision ep_re 1742.8239239229647

{"global_step": 900000, "eval_re": [2156.2337612530805, 2932.8876454319825, 
1239.281638991337, 1249.4753409621337, 1457.5598285494357, 2282.9585905025006, 
1762.6980205669834, 1201.400031208264, 1639.0935608290094, 1506.6508209349192], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [13:32:20<56:53, 26.37it/s]global step 910000, trans_decision ep_re 1609.4681137135094

{"global_step": 910000, "eval_re": [1564.822176605902, 1585.345407384514, 
1227.983452281032, 1765.0917971815832, 1227.3635779384451, 1659.719813440348, 
1995.718451603737, 2233.107693964351, 1449.208727915175, 1386.320038820007], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [13:41:10<51:24, 25.93it/s]global step 920000, trans_decision ep_re 2014.8080459512416

{"global_step": 920000, "eval_re": [2054.8627492831615, 1701.8340168785696, 
1516.445597995944, 1679.5106604808475, 2099.845826545925, 2357.075829183283, 
2131.144128333873, 1748.3273882926665, 2315.426873101476, 2543.607389416673], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929998/1000000 [13:50:10<43:45, 26.67it/s]global step 930000, trans_decision ep_re 1619.6921920876687

{"global_step": 930000, "eval_re": [1375.4968940686929, 1576.0857237705948, 
2846.0675157790024, 1229.6386717305666, 1319.611354500979, 1159.1604869806256, 
1204.5331918766744, 1410.0179355410937, 2469.0408430968805, 1607.2693035315772],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939998/1000000 [13:59:10<37:50, 26.43it/s]global step 940000, trans_decision ep_re 1605.4620940034642

{"global_step": 940000, "eval_re": [1274.685194657262, 1837.0861879660595, 
1327.90660747492, 1418.0130213622328, 1665.6077289481632, 1838.275233099813, 
1622.917401471571, 2235.9387603492173, 1551.8752247040084, 1282.315580001395], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [14:08:00<31:36, 26.36it/s]global step 950000, trans_decision ep_re 1653.492339905967

{"global_step": 950000, "eval_re": [1815.7636601135287, 1338.7532387481076, 
1221.1902498461454, 1566.4112509809552, 1688.3694843571673, 1858.2754515454199, 
1916.5068767376945, 1514.0360673460157, 2093.991853540889, 1521.6252658437475], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959998/1000000 [14:17:00<25:25, 26.22it/s]global step 960000, trans_decision ep_re 1586.2761322864797

{"global_step": 960000, "eval_re": [1612.6916825444657, 2752.8525679459763, 
1508.3985736401683, 1378.7162555997734, 1368.8800480139992, 1398.0021589296978, 
1601.6113361729267, 1606.4738857212558, 1161.444347529337, 1473.690466767198], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969997/1000000 [14:25:50<18:46, 26.63it/s]global step 970000, trans_decision ep_re 1337.2029923806128

{"global_step": 970000, "eval_re": [1553.632687829771, 174.86751068298824, 
1722.090785695428, 1341.8492373812066, 1429.6943667339885, 1542.5874087947007, 
1243.8474135397537, 1254.989528097771, 1859.1656774002893, 1249.3053076502335], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [14:34:40<12:36, 26.42it/s]global step 980000, trans_decision ep_re 1744.801839556752

{"global_step": 980000, "eval_re": [1883.5151998005686, 1173.4204320921128, 
1948.3202226666335, 2023.807656934045, 3105.7003663137148, 1497.979347489607, 
1661.0520148117994, 1390.9047350863284, 1037.484361491031, 1725.8340588816798], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989998/1000000 [14:43:40<06:12, 26.89it/s]global step 990000, trans_decision ep_re 1508.2119733534773

{"global_step": 990000, "eval_re": [1709.1732178368259, 1883.2504576104047, 
1614.1958609782457, 1240.3035569582917, 1204.0333418568725, 1560.223097652543, 
1676.070573549613, 1245.2257758663995, 1568.3951258603433, 1381.2487253652346], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [14:52:30<00:00, 26.39it/s]global step 1000000, trans_decision ep_re 1723.0773134294868

{"global_step": 1000000, "eval_re": [1705.7505277335372, 1332.7854107296287, 
1520.4844979992317, 1371.4439923963248, 2580.8855915245176, 1361.2624375896323, 
1758.3581001407692, 1448.5350529345633, 1449.3330368356276, 2701.934486411036], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [14:53:00<00:00, 18.66it/s]
