
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:20<12:40:10, 21.71it/s]global step 10000, trans_decision ep_re 4.813004059858241

{"global_step": 10000, "eval_re": [6.971847056889984, 4.262125843460077, 
6.881872528021405, 1.959889116168802, 4.314517080017863, 1.708071655630836, 
2.4841392978534196, 12.051678467391941, 3.4281983365902895, 4.0677012165577935],
"eval_len": [32, 14, 22, 26, 24, 17, 15, 25, 23, 27]}

  2%|▏         | 19999/1000000 [15:40<12:30:15, 21.77it/s]global step 20000, trans_decision ep_re 4.327526611816028

{"global_step": 20000, "eval_re": [-0.8544997247057339, 5.5573278675175315, 
-2.915341681310805, -3.1492036950748497, 0.1743877592455642, 
0.13479730124131073, 11.50707681807606, -0.07639217593886694, 25.12386099029555,
7.7732526588145205], "eval_len": [10, 28, 13, 16, 17, 13, 45, 19, 56, 25]}

  3%|▎         | 29998/1000000 [25:41<12:15:06, 21.99it/s]global step 30000, trans_decision ep_re 20.107241336010983

{"global_step": 30000, "eval_re": [7.825201446730254, 170.89926041592392, 
-9.165383038864025, -0.05489594909807738, 6.656623017437466, 6.406332155140916, 
-3.0182385478310025, 16.707444637367253, 7.593728307365323, 
-2.7776590840622055], "eval_len": [51, 143, 15, 44, 20, 20, 14, 32, 25, 24]}

  4%|▍         | 39999/1000000 [36:10<12:22:52, 21.54it/s]global step 40000, trans_decision ep_re 5.828899332462821

{"global_step": 40000, "eval_re": [8.920907324891862, 17.399334720220153, 
-2.1588412485589537, -5.203686126635962, 14.373754705868684, 4.57138448452045, 
10.775161005332087, -0.9234395684986947, 7.078654465520378, 3.455763561968195], 
"eval_len": [25, 62, 11, 21, 36, 17, 31, 12, 26, 18]}

  5%|▍         | 49999/1000000 [46:30<12:07:36, 21.76it/s]global step 50000, trans_decision ep_re 4.533355831219405

{"global_step": 50000, "eval_re": [6.6409842485849895, -3.028650912891361, 
5.782126170595914, 12.840016763844945, 5.640853880570504, 8.464480046592962, 
2.249886361570411, 6.290558152211464, -0.11025185844715141, 0.563555459561369], 
"eval_len": [24, 25, 17, 28, 20, 17, 26, 18, 18, 17]}

  6%|▌         | 59998/1000000 [56:31<11:50:27, 22.05it/s]global step 60000, trans_decision ep_re 28.760066771129324

{"global_step": 60000, "eval_re": [10.326736823889963, 2.2355201112561542, 
1.9689623812962933, 3.5392555759461035, 1.1669214811137798, 3.3183240133559417, 
16.88006593782717, 15.295526572230726, 5.88625805577224, 226.98309675860486], 
"eval_len": [25, 18, 36, 15, 27, 17, 25, 32, 29, 241]}

  7%|▋         | 69999/1000000 [1:07:00<11:55:01, 21.68it/s]global step 70000, trans_decision ep_re 3.0778698035780727

{"global_step": 70000, "eval_re": [-5.042199318864474, 2.3197687783551735, 
1.9697034272703307, 1.3939550788230766, 10.150802010281932, 6.727384086283305, 
7.632578996715692, 1.926778674376746, -0.6573383019126585, 4.357264604451604], 
"eval_len": [276, 19, 13, 17, 36, 19, 20, 13, 22, 18]}

  8%|▊         | 79999/1000000 [1:17:20<11:48:19, 21.65it/s]global step 80000, trans_decision ep_re 5.337433040328186

{"global_step": 80000, "eval_re": [14.703072139910251, 1.2070359147175462, 
7.1368421341669395, 9.156288334861388, 2.592208433339101, -6.975344185990113, 
1.484442694781051, 9.263542248745408, 9.354478386245924, 5.451764302504366], 
"eval_len": [36, 20, 18, 22, 24, 29, 27, 32, 19, 25]}

  9%|▉         | 89997/1000000 [1:27:40<11:42:56, 21.58it/s]global step 90000, trans_decision ep_re 4.938286496857488

{"global_step": 90000, "eval_re": [10.43175609471988, -0.1365311757067329, 
2.9450952051544865, -1.078962426059416, 11.916374124111046, 2.5042210322468383, 
4.751923225392565, 11.977234222191907, 4.411028284931228, 1.6607263815930793], 
"eval_len": [35, 18, 29, 18, 24, 18, 34, 33, 15, 14]}

 10%|▉         | 99999/1000000 [1:37:50<11:27:01, 21.83it/s]global step 100000, trans_decision ep_re 5.8364065081562195

{"global_step": 100000, "eval_re": [2.917452869535907, 9.61321240906521, 
14.875085500401832, 13.095482310281309, 0.6240187911442084, 3.822775056131388, 
0.47933808029703456, 2.5773746768908854, 2.8800163034833233, 7.4793090843311], 
"eval_len": [17, 22, 25, 26, 14, 18, 13, 19, 25, 38]}

 11%|█         | 109999/1000000 [1:48:10<11:21:37, 21.76it/s]global step 110000, trans_decision ep_re 3.2098395329395273

{"global_step": 110000, "eval_re": [4.388474960196167, -0.7301809103916497, 
5.365221062972465, -0.9333016557097444, 6.713490024188022, 2.9170900508660065, 
-0.682117092991028, 5.420044201207617, 9.503461668201759, 0.13621302085566245], 
"eval_len": [16, 24, 21, 17, 18, 13, 14, 35, 17, 16]}

 12%|█▏        | 119998/1000000 [1:58:12<11:08:33, 21.94it/s]global step 120000, trans_decision ep_re 25.00532315997572

{"global_step": 120000, "eval_re": [5.675664641772542, -2.6209236472254958, 
-0.7352607839906178, -2.1127823909823733, 2.5849413834162194, 
-1.1760106114857574, -1.0853464620445479, -1.8144650911866942, 
-0.6724111182601155, 252.00982567974404], "eval_len": [22, 18, 13, 11, 25, 26, 
10, 20, 9, 157]}

 13%|█▎        | 129999/1000000 [2:08:40<11:05:27, 21.79it/s]global step 130000, trans_decision ep_re 3.89004692947857

{"global_step": 130000, "eval_re": [-1.7809645465464448, 8.580364962784714, 
2.1382443282591854, -3.0402431572003716, 4.702944344147499, 6.648377625304308, 
-2.6746827520342573, 2.3465797062619935, 1.6758241507106892, 
20.304024633098386], "eval_len": [10, 28, 26, 15, 18, 26, 32, 19, 14, 35]}

 14%|█▍        | 139998/1000000 [2:18:42<10:48:30, 22.10it/s]global step 140000, trans_decision ep_re 4.742724044169398

{"global_step": 140000, "eval_re": [0.2746748262661952, 5.1363338491025665, 
6.430756421422107, 13.306666524168357, -2.3730878797607837, 11.138268762094397, 
3.883802502604104, 13.184974191506681, 0.11500095822834201, -3.670149713937986],
"eval_len": [27, 18, 19, 25, 17, 36, 19, 24, 36, 15]}

 15%|█▍        | 149999/1000000 [2:29:10<10:43:46, 22.01it/s]global step 150000, trans_decision ep_re 6.851262853184652

{"global_step": 150000, "eval_re": [7.987252945537234, 13.776380995933964, 
1.6302000148581939, 0.6424515478963684, 4.150347163435951, 11.698126618694218, 
3.9768439552537465, 5.258305636037507, 4.378848297017697, 15.013871357181635], 
"eval_len": [23, 37, 16, 19, 18, 34, 18, 21, 21, 30]}

 16%|█▌        | 159999/1000000 [2:39:11<10:42:44, 21.78it/s]global step 160000, trans_decision ep_re 1.3810748790409235

{"global_step": 160000, "eval_re": [2.8153664555865303, -2.2012869510235338, 
2.8005003672288895, 5.418916596141508, 3.6704294534736968, -1.3404494547524533, 
6.8982361430846515, 0.08771377196197294, -3.101373952712518, 
-1.2373036385795073], "eval_len": [15, 15, 19, 18, 17, 11, 33, 28, 10, 12]}

 17%|█▋        | 169999/1000000 [2:49:40<10:35:05, 21.78it/s]global step 170000, trans_decision ep_re 4.73059077151876

{"global_step": 170000, "eval_re": [1.1427182994262353, 4.70322577277141, 
-0.14425199912877332, -1.4057180942888734, 2.568606378675969, 7.775019267686554,
12.818452467282848, 1.9084785872701364, 9.867862252480814, 8.071514783011283], 
"eval_len": [15, 23, 17, 11, 16, 30, 29, 14, 23, 20]}

 18%|█▊        | 179999/1000000 [2:59:50<10:24:37, 21.88it/s]global step 180000, trans_decision ep_re 16.44083796265226

{"global_step": 180000, "eval_re": [8.031152719716736, 5.9023317382691625, 
-1.5684615815184961, -3.1735922923979363, 0.5836932579500638, 
3.0391937619722813, 5.630136367950381, 6.445978346609902, 0.6815939167721974, 
138.83635339119832], "eval_len": [34, 18, 21, 16, 13, 19, 19, 36, 26, 118]}

 19%|█▉        | 189998/1000000 [3:09:51<10:13:00, 22.02it/s]global step 190000, trans_decision ep_re 14.500492417648047

{"global_step": 190000, "eval_re": [8.5383357433352, 1.8509153048960754, 
-0.8225419303578806, 4.035394504590158, 9.377187996021382, 59.11674883018559, 
-2.9836251058283105, -0.647997203313177, 18.695603921003958, 47.84490211594745],
"eval_len": [32, 21, 32, 26, 18, 79, 11, 10, 34, 70]}

 20%|█▉        | 199999/1000000 [3:20:20<10:12:26, 21.77it/s]global step 200000, trans_decision ep_re 4.707913107195331

{"global_step": 200000, "eval_re": [7.428977862349189, 4.098668737932379, 
1.8213465001147786, 3.4710965360537887, 2.781511536341707, 5.9637322927598655, 
5.7288879750776545, 14.991025381719362, -2.070224176723166, 2.8641084263277534],
"eval_len": [26, 16, 22, 30, 17, 17, 21, 35, 23, 17]}

 21%|██        | 209999/1000000 [3:30:30<10:04:44, 21.77it/s]global step 210000, trans_decision ep_re 8.786812365210801

{"global_step": 210000, "eval_re": [21.02704822772688, 10.60458854090847, 
3.154918663166397, -0.5247694750240974, 5.811718417841509, 16.806346091511706, 
7.735417900207281, 23.60709961590096, -4.424474266829416, 4.070229936698321], 
"eval_len": [40, 36, 14, 20, 30, 41, 28, 49, 15, 26]}

 22%|██▏       | 219998/1000000 [3:40:31<9:46:21, 22.17it/s]global step 220000, trans_decision ep_re 1.6852956436832653

{"global_step": 220000, "eval_re": [-1.583585666593267, 9.641986666810366, 
1.6040467087388075, 4.416609092967021, -0.5138491576823117, -4.0602151140443405,
-0.5298187867785145, 0.6980013550062547, 3.040641045282301, 4.139140293126335], 
"eval_len": [12, 32, 16, 15, 10, 21, 11, 24, 23, 14]}

 23%|██▎       | 229999/1000000 [3:51:00<9:47:50, 21.83it/s]global step 230000, trans_decision ep_re 3.234468115011084

{"global_step": 230000, "eval_re": [0.4047479172926738, 3.840547664620189, 
2.095846602677571, 7.693046785856641, 4.431035160197599, -2.0528850937287784, 
-1.6458635782747686, 7.934745319983462, 5.521393371662122, 4.122066999824126], 
"eval_len": [19, 34, 12, 19, 14, 32, 13, 24, 17, 18]}

 24%|██▍       | 239999/1000000 [4:01:10<9:42:00, 21.76it/s]global step 240000, trans_decision ep_re 2.189798805013358

{"global_step": 240000, "eval_re": [3.805616055286697, 0.859828223093865, 
-2.0616590279386644, 1.2185622018716833, -1.8142034383240841, 5.620521417267143,
-0.3571995821311227, 0.558511649899786, 4.719417042752157, 9.348593508356121], 
"eval_len": [25, 34, 14, 17, 13, 18, 17, 15, 17, 37]}

 25%|██▍       | 249999/1000000 [4:11:10<9:29:42, 21.94it/s]global step 250000, trans_decision ep_re 4.1547689610052485

{"global_step": 250000, "eval_re": [3.491045333693849, 1.0104538876338724, 
12.336747949377754, 6.537586016679702, 6.147616156997539, 4.4430612210199145, 
2.663579300076466, 6.374555082055119, 3.2206584315093525, -4.677613768991082], 
"eval_len": [19, 20, 27, 17, 25, 17, 18, 40, 20, 17]}

 26%|██▌       | 259998/1000000 [4:21:21<9:13:04, 22.30it/s]global step 260000, trans_decision ep_re 54.216074561575155

{"global_step": 260000, "eval_re": [1.1768499366734917, 3.9565511345308964, 
6.576582735568459, 522.8276332625927, -4.4681327455106805, 0.8265098361129258, 
2.3171245988644675, 5.542451444742655, 4.82668460004273, -1.4215091878659976], 
"eval_len": [16, 23, 22, 242, 15, 14, 17, 18, 28, 11]}

 27%|██▋       | 269998/1000000 [4:31:31<9:07:34, 22.22it/s]global step 270000, trans_decision ep_re 7.0692036384226

{"global_step": 270000, "eval_re": [-0.40079588973932, 9.786078198409548, 
5.851795699153043, 4.555219267185894, 26.14084792270504, 4.204786080846187, 
4.4435125411447665, 5.146154768435002, 6.438321988219859, 4.526115807865995], 
"eval_len": [18, 33, 20, 22, 49, 29, 18, 16, 26, 29]}

 28%|██▊       | 279998/1000000 [4:41:41<8:56:59, 22.35it/s]global step 280000, trans_decision ep_re 3.995955487786369

{"global_step": 280000, "eval_re": [1.878939635058269, 2.6124370149518223, 
5.454595578320758, 10.876202771022943, -1.1879434968970768, 
-0.22061025315543317, 8.749961736793558, 14.052763817183, 0.010850117565324058, 
-2.2676420429794817], "eval_len": [17, 14, 27, 26, 19, 16, 27, 34, 13, 10]}

 29%|██▉       | 289998/1000000 [4:51:51<8:53:01, 22.20it/s]global step 290000, trans_decision ep_re 4.20663355654783

{"global_step": 290000, "eval_re": [1.6261994730950977, 8.109500375150631, 
9.101660912926354, 5.213896233542474, 5.094382155304226, -0.14364920942847992, 
-1.1350339042107016, 5.1415952221768935, 6.995758509123917, 2.0620257977978858],
"eval_len": [16, 29, 31, 27, 20, 17, 12, 22, 19, 19]}

 30%|██▉       | 299998/1000000 [5:02:01<8:40:49, 22.40it/s]global step 300000, trans_decision ep_re 5.471801581312326

{"global_step": 300000, "eval_re": [21.72297866796914, 1.9330228860395438, 
7.842435234076729, 1.6540308656189955, -8.661757891026596, 6.919035904994432, 
-0.5391369715249337, 24.61528465268139, 6.004878080884878, -6.772755616590307], 
"eval_len": [40, 17, 20, 22, 16, 20, 16, 51, 15, 16]}

 31%|███       | 309999/1000000 [5:12:20<8:37:48, 22.21it/s]global step 310000, trans_decision ep_re 10.818661013666663

{"global_step": 310000, "eval_re": [5.215975263475124, 4.515501200078142, 
0.23986191313869512, 15.518331054489039, 27.592680155004597, 20.704352448530692,
-7.196677597071132, 3.473464171955094, 17.630565820907787, 20.49255570615858], 
"eval_len": [17, 19, 12, 35, 53, 44, 19, 16, 33, 34]}

 32%|███▏      | 319999/1000000 [5:22:30<8:32:40, 22.11it/s]global step 320000, trans_decision ep_re 38.66901965869796

{"global_step": 320000, "eval_re": [4.170615507211304, 340.673725795335, 
5.565753995836045, 7.0850119707937536, 2.31516784275491, 8.957667318091445, 
-1.6423908102580953, 4.546264866664939, 14.871814584120285, 0.1465655164300162],
"eval_len": [22, 200, 22, 27, 38, 20, 18, 18, 34, 13]}

 33%|███▎      | 329999/1000000 [5:32:40<8:25:33, 22.09it/s]global step 330000, trans_decision ep_re 4.43343344998191

{"global_step": 330000, "eval_re": [-2.4450681153938665, 14.080913911791242, 
5.325068415686956, 17.44997217776115, -3.65713243312094, 4.821726948868942, 
2.450134918177217, 1.3760871537254304, 9.340595555152621, -4.40796403282966], 
"eval_len": [9, 28, 36, 43, 24, 32, 13, 26, 31, 26]}

 34%|███▍      | 339999/1000000 [5:42:50<8:22:32, 21.89it/s]global step 340000, trans_decision ep_re 4.293552606262894

{"global_step": 340000, "eval_re": [-1.6112626124522254, 5.607033585355118, 
9.447619181834458, 2.8252626136948513, 6.61991505542828, 0.5866035947796078, 
0.4297700347763812, 4.256128590145648, 1.4251664089149343, 13.34928961015189], 
"eval_len": [14, 23, 24, 27, 26, 13, 24, 25, 31, 28]}

 35%|███▍      | 349998/1000000 [5:53:00<8:07:03, 22.24it/s]global step 350000, trans_decision ep_re 4.160731247307282

{"global_step": 350000, "eval_re": [14.30773001061938, 5.307657550835973, 
0.35857972617941, 0.5242368927457384, -0.34545253224343336, 2.861284458391066, 
0.9909177708232941, -0.37212959316126903, 10.685092342211764, 
7.289395846670898], "eval_len": [39, 24, 20, 11, 21, 19, 15, 29, 38, 28]}

 36%|███▌      | 359998/1000000 [6:02:53<7:59:15, 22.26it/s]global step 360000, trans_decision ep_re 2.9073953215272588

{"global_step": 360000, "eval_re": [3.3430945868684523, 0.5566210466442465, 
7.385611078477236, -1.6351692738228725, 3.1357480839379335, 0.3381388300302387, 
10.342679180475336, 4.3704424606952275, 1.4886322922880901, 
-0.2518450703213013], "eval_len": [17, 19, 20, 16, 12, 28, 37, 32, 13, 15]}

 37%|███▋      | 369999/1000000 [6:13:01<7:56:47, 22.02it/s]global step 370000, trans_decision ep_re 3.6412649713078338

{"global_step": 370000, "eval_re": [0.36464092618220323, 7.663296862320687, 
6.140400399182637, -3.112305767075781, 5.0435094214350045, 3.4443705804486946, 
-2.1046990249728323, 7.063693107160787, -0.11461704356229552, 
12.024360251959235], "eval_len": [16, 31, 16, 18, 35, 21, 29, 18, 13, 33]}

 38%|███▊      | 379999/1000000 [6:23:20<7:45:48, 22.18it/s]global step 380000, trans_decision ep_re 31.92314659247105

{"global_step": 380000, "eval_re": [283.1286138321195, 3.485026238581267, 
6.8501240338612055, 2.0584170931057435, 10.260793530571004, 3.153070593178795, 
1.815329925612981, 1.5682850689326329, 4.455857059373115, 2.4559485493742454], 
"eval_len": [131, 17, 20, 15, 19, 29, 19, 24, 15, 16]}

 39%|███▉      | 389999/1000000 [6:33:30<7:40:49, 22.06it/s]global step 390000, trans_decision ep_re 7.056779206168548

{"global_step": 390000, "eval_re": [10.913583137748434, 2.597392853717835, 
0.1633636303182367, 8.820123625858596, 7.1278343796208645, 3.549156109363213, 
8.065410950833703, 26.641147273024117, 0.28040055234248845, 2.4093795488580025],
"eval_len": [42, 23, 15, 23, 24, 18, 26, 47, 11, 21]}

 40%|███▉      | 399999/1000000 [6:43:40<7:33:08, 22.07it/s]global step 400000, trans_decision ep_re 2.15945248354907

{"global_step": 400000, "eval_re": [0.37763019742638726, 2.703174329746941, 
-2.6674112478095036, 3.287054774081938, 1.450124733880683, 0.4753520933271451, 
5.891138549666918, 0.14819350666815662, 9.810148249517077, 0.1191196489849608], 
"eval_len": [16, 28, 18, 30, 26, 20, 17, 14, 44, 13]}

 41%|████      | 409999/1000000 [6:53:50<7:28:35, 21.92it/s]global step 410000, trans_decision ep_re 2.388871035555595

{"global_step": 410000, "eval_re": [0.216420670819862, 0.9722509631448443, 
11.549254420500457, 2.9173088398330727, -3.2552986358664713, 6.505962345379434, 
8.564099693090006, -3.4090221205410316, -1.2916207737824625, 
1.1193549529782423], "eval_len": [11, 12, 23, 16, 11, 22, 27, 18, 13, 14]}

 42%|████▏     | 419998/1000000 [7:03:43<7:14:41, 22.24it/s]global step 420000, trans_decision ep_re 8.89199520014456

{"global_step": 420000, "eval_re": [54.685013663327545, 5.769473690347625, 
-0.513712942995115, 3.0413061346295476, 18.285044577146707, -1.2376302541798503,
10.21658151835746, 1.2231011998739814, -2.653163492050194, 0.10393790698789157],
"eval_len": [65, 22, 17, 16, 42, 17, 34, 17, 20, 17]}

 43%|████▎     | 429998/1000000 [7:13:51<7:05:36, 22.32it/s]global step 430000, trans_decision ep_re 10.656895846415372

{"global_step": 430000, "eval_re": [3.093459253735086, -0.22976664332249613, 
3.4596827521625566, 11.853155990263692, 41.11849998587518, 15.034834868177409, 
2.2050214441416824, 10.954748069457459, 12.520094184413706, 6.55922855924945], 
"eval_len": [15, 25, 24, 25, 86, 25, 24, 39, 27, 31]}

 44%|████▍     | 439999/1000000 [7:24:00<7:01:36, 22.14it/s]global step 440000, trans_decision ep_re 39.263905272144186

{"global_step": 440000, "eval_re": [3.5472090757559247, 27.510413895718838, 
-2.1435522932120152, 8.263716917897613, 6.07204290480602, 1.1035723019090746, 
6.904500316187439, 8.571627025239561, -0.5612592824106272, 333.37078185955005], 
"eval_len": [13, 42, 9, 29, 18, 11, 28, 26, 14, 208]}

 45%|████▍     | 449999/1000000 [7:34:20<6:54:21, 22.12it/s]global step 450000, trans_decision ep_re 2.133462402521156

{"global_step": 450000, "eval_re": [3.5095266454875227, 8.299422710099517, 
-1.6021707969795986, 1.410555850417314, 2.6118963327288847, 
-0.23617840213390645, 3.2992192914899188, 2.9009570781219316, -2.98160023375287,
4.122995549732849], "eval_len": [35, 19, 18, 13, 16, 13, 12, 22, 17, 14]}

 46%|████▌     | 459999/1000000 [7:44:30<6:46:53, 22.12it/s]global step 460000, trans_decision ep_re 2.0629868758830887

{"global_step": 460000, "eval_re": [4.504017395670611, 1.8757398924453754, 
-5.165221135374682, 2.501920166799266, 4.756737540059561, 1.6503432828979663, 
1.318976494514863, -1.3466558115168734, 0.37100728771214964, 
10.163003645622652], "eval_len": [28, 15, 19, 26, 16, 29, 17, 29, 12, 27]}

 47%|████▋     | 469997/1000000 [7:54:24<6:40:18, 22.07it/s]global step 470000, trans_decision ep_re 3.6193391843898945

{"global_step": 470000, "eval_re": [8.307739263851262, 1.6340220987563723, 
16.10871817012445, -0.0714702354133761, 5.852954437240631, -1.3495136360685707, 
-3.799115076226547, 1.3722121847279949, 6.374483597115084, 1.7633610397916426], 
"eval_len": [18, 24, 35, 11, 16, 27, 27, 16, 24, 11]}

 48%|████▊     | 479998/1000000 [8:04:31<6:29:56, 22.23it/s]global step 480000, trans_decision ep_re 2.001463356247954

{"global_step": 480000, "eval_re": [-0.18738023974714824, 3.768550228137624, 
0.7123344619120816, 6.145178328541347, 0.9385312370246593, 3.980581470328188, 
-1.4181373855878212, 5.4147174224521555, -0.45889529387609806, 
1.1191533332945494], "eval_len": [23, 16, 26, 26, 17, 14, 14, 15, 11, 12]}

 49%|████▉     | 489999/1000000 [8:14:50<6:27:00, 21.96it/s]global step 490000, trans_decision ep_re 4.369683273084259

{"global_step": 490000, "eval_re": [5.742855425674659, -2.2460231786978593, 
-0.4290072080792634, 0.8709357255691066, 41.728109440751474, -5.823469777323714,
2.6337397929259794, 2.7268697634513575, 1.4584355787692318, 
-2.9656128321983823], "eval_len": [17, 12, 11, 14, 82, 27, 12, 14, 29, 11]}

 50%|████▉     | 499999/1000000 [8:25:00<6:18:12, 22.03it/s]global step 500000, trans_decision ep_re 5.913629780480327

{"global_step": 500000, "eval_re": [-1.899321459212338, 2.822274137079496, 
7.138420426862093, 5.045448229605937, 11.890865733729042, 4.354039745934906, 
0.11187455912728195, 17.73788616038469, 12.218735098859087, 
-0.2839248275669174], "eval_len": [9, 18, 18, 24, 25, 24, 15, 28, 25, 26]}

 51%|█████     | 509997/1000000 [8:35:10<6:09:37, 22.09it/s]global step 510000, trans_decision ep_re 2.948657027697594

{"global_step": 510000, "eval_re": [2.289929715474655, -2.482219742337971, 
3.8140572405863558, 2.116841670651939, 12.096140315831851, -1.9979369673825471, 
0.5212442856343011, -6.227919145610945, 13.12102163400617, 6.235411270122128], 
"eval_len": [19, 12, 17, 14, 32, 20, 18, 18, 27, 32]}

 52%|█████▏    | 519997/1000000 [8:45:02<6:00:45, 22.18it/s]global step 520000, trans_decision ep_re 5.78345528159344

{"global_step": 520000, "eval_re": [0.819466929203066, 4.044888259944768, 
-0.304325067764709, 3.956530043906072, 19.80236795717061, 4.410255770225511, 
9.930677912987404, 11.834847624398776, 5.7551985063775435, -2.415355120514646], 
"eval_len": [20, 28, 10, 17, 30, 20, 21, 28, 27, 35]}

 53%|█████▎    | 529999/1000000 [8:55:10<5:55:02, 22.06it/s]global step 530000, trans_decision ep_re 3.6579269347392214

{"global_step": 530000, "eval_re": [1.8279074447968766, 4.931426173883723, 
3.4919365352127243, 0.20012077886733282, 6.040364555778309, 2.46333726193516, 
4.8767538956184175, 8.119382410036573, 1.4016796985204631, 3.226360592742633], 
"eval_len": [12, 20, 18, 16, 23, 14, 19, 19, 11, 19]}

 54%|█████▍    | 539999/1000000 [9:05:30<5:47:56, 22.03it/s]global step 540000, trans_decision ep_re 7.477861706745415

{"global_step": 540000, "eval_re": [4.331283912057547, 65.34623975798847, 
-2.1008769429656633, 7.715830961017033, -0.020147902498978687, 
-0.10277545568105748, -2.6418066529382913, -2.3901604439877615, 
6.253942996638648, -1.6129131621757904], "eval_len": [21, 95, 14, 25, 24, 16, 
21, 17, 21, 18]}

 55%|█████▍    | 549999/1000000 [9:15:40<5:39:50, 22.07it/s]global step 550000, trans_decision ep_re 6.024777226558425

{"global_step": 550000, "eval_re": [7.214802778011038, 30.42597281349562, 
5.8074718058641315, -0.4899647305177458, 3.865900821629194, 6.583028856749837, 
1.3429511627899244, 6.993355868924416, -0.5414912474094686, 
-0.9542558639527013], "eval_len": [17, 52, 16, 18, 18, 26, 15, 27, 19, 27]}

 56%|█████▌    | 559999/1000000 [9:25:50<5:33:08, 22.01it/s]global step 560000, trans_decision ep_re 6.760116155688651

{"global_step": 560000, "eval_re": [4.798918565247471, 25.149085893729204, 
-3.070471370224494, 24.658549618912094, 3.13937836262815, 3.5330854624842276, 
3.531411807554673, -0.5809029748636103, 5.333134956771485, 1.1089712346473057], 
"eval_len": [15, 44, 21, 47, 14, 19, 16, 15, 35, 12]}

 57%|█████▋    | 569999/1000000 [9:36:00<5:24:28, 22.09it/s]global step 570000, trans_decision ep_re 2.6728177591848596

{"global_step": 570000, "eval_re": [4.0118590762088315, 1.9001359118640773, 
1.7073725127290225, 2.8344120054845034, -1.5667200209650536, 2.032211858173904, 
8.907939692018918, 7.797803115082698, -0.4624372371838785, 
-0.43439932156442673], "eval_len": [15, 21, 20, 29, 23, 14, 23, 18, 20, 18]}

 58%|█████▊    | 579998/1000000 [9:46:10<5:14:57, 22.23it/s]global step 580000, trans_decision ep_re 6.11094952973418

{"global_step": 580000, "eval_re": [0.57981407947056, 0.01132704613451846, 
8.34815751160144, 18.542522603987187, 4.091309545655939, 1.574356585200525, 
4.846392175512616, 14.951348596661704, 0.7834406819532967, 7.3808264711640135], 
"eval_len": [13, 12, 20, 47, 15, 16, 20, 45, 15, 17]}

 59%|█████▉    | 589997/1000000 [9:56:03<5:11:43, 21.92it/s]global step 590000, trans_decision ep_re 5.401105851397149

{"global_step": 590000, "eval_re": [9.942904121890825, 3.059409762548209, 
-3.9757823557935557, 5.1385661321316265, 0.18935053674587177, 
12.288165270723523, 6.985165001317565, 5.5900153113352555, 6.141744069500298, 
8.651520663571873], "eval_len": [27, 15, 17, 32, 24, 24, 18, 20, 19, 20]}

 60%|█████▉    | 599997/1000000 [10:06:12<4:59:38, 22.25it/s]global step 600000, trans_decision ep_re 6.960161228868955

{"global_step": 600000, "eval_re": [10.417262560032091, 1.2344129233275964, 
7.695445339089353, 15.837636354341278, 6.712145969189406, 4.273161943156677, 
-0.6347420501100761, 17.909710448534106, 1.8040200641821569, 4.352558736946964],
"eval_len": [25, 12, 22, 38, 32, 26, 19, 36, 13, 19]}

 61%|██████    | 609999/1000000 [10:16:20<4:54:42, 22.06it/s]global step 610000, trans_decision ep_re 3.9404454361908217

{"global_step": 610000, "eval_re": [12.362946373046007, 2.492789846186605, 
0.10542223951128088, -0.531964972246175, 12.160144428454041, -2.096682924531543,
0.8123533364045793, 12.631794576862237, -0.037423056094815, 1.5050745143160025],
"eval_len": [23, 16, 12, 12, 30, 27, 33, 35, 10, 15]}

 62%|██████▏   | 619999/1000000 [10:26:40<4:46:57, 22.07it/s]global step 620000, trans_decision ep_re 54.3648642633129

{"global_step": 620000, "eval_re": [166.9978988659145, 9.677812809364827, 
0.28348035486126943, 4.744117837485296, 2.752072331091319, 340.7139222417436, 
7.7347413397463445, 0.3846143410490778, 5.117760037583717, 5.242222474289069], 
"eval_len": [100, 34, 23, 19, 24, 222, 23, 22, 20, 23]}

 63%|██████▎   | 629999/1000000 [10:36:50<4:38:53, 22.11it/s]global step 630000, trans_decision ep_re 27.38138385296195

{"global_step": 630000, "eval_re": [10.31968973678672, 19.113711747593324, 
0.7254663567905647, 10.980305651717556, 1.1615998433838581, 7.190638183196678, 
-3.918510488564957, 222.60722718617384, 3.6291226807589667, 2.0045876317829348],
"eval_len": [24, 35, 19, 34, 15, 30, 11, 151, 20, 16]}

 64%|██████▍   | 639999/1000000 [10:47:00<4:30:26, 22.19it/s]global step 640000, trans_decision ep_re 12.308887009169473

{"global_step": 640000, "eval_re": [1.7330431273845592, -1.16856337437153, 
-0.45453351693148114, 26.375225265921472, 6.219449643466849, 3.002371173302067, 
1.0053553577215735, 62.433994102133326, 1.168429691822425, 22.774098621245464], 
"eval_len": [11, 17, 12, 47, 56, 25, 15, 75, 14, 38]}

 65%|██████▍   | 649999/1000000 [10:57:10<4:25:11, 22.00it/s]global step 650000, trans_decision ep_re 10.520605869423738

{"global_step": 650000, "eval_re": [9.442332365680391, 3.8269528544013234, 
17.92786606379537, 4.188833416950919, 14.83342602673257, 2.5478289666939364, 
35.18520468667198, 2.694771505212751, 9.10480379843739, 5.454039009660758], 
"eval_len": [32, 17, 54, 17, 26, 13, 106, 25, 17, 21]}

 66%|██████▌   | 659997/1000000 [11:07:20<4:16:41, 22.08it/s]global step 660000, trans_decision ep_re 3.682439669299819

{"global_step": 660000, "eval_re": [-0.21355961483261193, 8.626269773797357, 
20.502720017474765, -1.4100084569038165, 0.8756696384182677, 6.617168729994711, 
1.352528714710827, -2.4651756418132584, -2.6739037226011813, 5.612687254753135],
"eval_len": [25, 35, 44, 15, 12, 16, 13, 25, 12, 21]}

 67%|██████▋   | 669999/1000000 [11:17:13<4:10:36, 21.95it/s]global step 670000, trans_decision ep_re 7.9152818266943665

{"global_step": 670000, "eval_re": [52.79534791803533, -2.214505012013934, 
1.0915752620555403, 2.92477487091666, 0.6092349836023621, 3.0910745689123558, 
3.5251548040148815, 13.433863654459204, 4.204583538296246, 
-0.30828632133498945], "eval_len": [77, 11, 12, 18, 10, 15, 30, 38, 25, 15]}

 68%|██████▊   | 679998/1000000 [11:27:21<4:00:22, 22.19it/s]global step 680000, trans_decision ep_re 6.066789805931728

{"global_step": 680000, "eval_re": [3.3678810782929274, 3.261534525005393, 
8.640468988546502, 18.721992398893136, 8.654421545685775, 6.5243408252186965, 
5.373808855966005, -1.8476363235288873, 3.1793187901763162, 4.79176737506141], 
"eval_len": [13, 29, 24, 47, 19, 27, 27, 12, 27, 31]}

 69%|██████▉   | 689999/1000000 [11:37:40<3:54:29, 22.03it/s]global step 690000, trans_decision ep_re 28.235728492745658

{"global_step": 690000, "eval_re": [9.222864964648334, 2.4199866190312576, 
8.042359652757382, 191.58549353410694, 8.000499794290056, 18.626485053660094, 
42.76581873713171, 1.2891125563056443, -1.1994321608726057, 1.6040961763977593],
"eval_len": [36, 14, 32, 125, 32, 39, 61, 17, 12, 14]}

 70%|██████▉   | 699999/1000000 [11:47:50<3:45:51, 22.14it/s]global step 700000, trans_decision ep_re 2.546179945420586

{"global_step": 700000, "eval_re": [13.540920737862692, -2.796847257335866, 
5.528445732122025, 4.635904052348884, 2.7705242552080964, 0.2262741445885972, 
0.2551843927501648, 0.576837344789004, 4.333486802520383, -3.6089307506481205], 
"eval_len": [31, 16, 25, 28, 13, 14, 15, 13, 26, 17]}

 71%|███████   | 709999/1000000 [11:58:00<3:38:44, 22.10it/s]global step 710000, trans_decision ep_re 8.585073069525446

{"global_step": 710000, "eval_re": [1.2877222564975848, 3.4110753310222854, 
4.958761502157078, 5.363808306072864, 4.011779252688047, 21.414646309146534, 
0.24759980499693676, 5.338275083597871, 0.142491850679768, 39.67457099839549], 
"eval_len": [28, 17, 15, 17, 26, 38, 26, 29, 11, 46]}

 72%|███████▏  | 719999/1000000 [12:08:10<3:31:20, 22.08it/s]global step 720000, trans_decision ep_re 5.758839585657715

{"global_step": 720000, "eval_re": [7.60805626144486, 17.001187090610067, 
1.6356619858285184, 2.7100271619816545, 1.3296870677255461, 7.845224088789789, 
-0.45432846104802005, 8.923122892507127, 3.615194286972758, 7.374563481764847], 
"eval_len": [21, 37, 13, 12, 16, 21, 10, 24, 17, 28]}

 73%|███████▎  | 729998/1000000 [12:18:04<3:21:31, 22.33it/s]global step 730000, trans_decision ep_re 6.629189173276814

{"global_step": 730000, "eval_re": [9.888547217458168, 4.895966639808102, 
21.97164777474584, 2.744397148806214, -0.5368456592696309, 9.365186788673777, 
7.641339100034833, 2.171535103954712, 5.425229442225193, 2.7248881763309276], 
"eval_len": [23, 22, 72, 15, 12, 19, 27, 12, 20, 17]}

 74%|███████▍  | 739999/1000000 [12:28:13<3:15:44, 22.14it/s]global step 740000, trans_decision ep_re 9.359704281558601

{"global_step": 740000, "eval_re": [3.393802235829875, -1.4260383896114135, 
8.040742594767067, -1.8068073198616976, 2.6062797416566426, 7.3604492887434105, 
8.053342546590436, 55.34644383823044, 10.376967092325872, 1.6518611869153736], 
"eval_len": [16, 19, 31, 25, 23, 17, 24, 108, 24, 14]}

 75%|███████▍  | 749997/1000000 [12:38:22<3:10:16, 21.90it/s]global step 750000, trans_decision ep_re 4.232066184612464

{"global_step": 750000, "eval_re": [3.5084888375168912, 2.8067402299325606, 
2.1217132186178573, 9.95667435588106, 6.236315625252347, 6.111100904453912, 
4.192489971768966, -2.0452224222848385, 8.131927716184169, 1.3004334088017124], 
"eval_len": [32, 14, 19, 23, 19, 18, 17, 11, 25, 16]}

 76%|███████▌  | 759999/1000000 [12:48:30<3:00:08, 22.20it/s]global step 760000, trans_decision ep_re 7.1714206067997806

{"global_step": 760000, "eval_re": [11.682108471358147, 3.0629884329649433, 
-0.3641630789523648, 26.67260318759868, 6.30703352796845, 2.293988185875509, 
10.350553294813533, 7.975317628605722, 0.14222917587101191, 3.5915472418941743],
"eval_len": [29, 17, 13, 51, 16, 13, 31, 32, 16, 22]}

 77%|███████▋  | 769999/1000000 [12:58:50<2:53:15, 22.12it/s]global step 770000, trans_decision ep_re 5.137485274015561

{"global_step": 770000, "eval_re": [3.3071207590771903, 6.367530541059915, 
3.522812914819744, 7.676467363476748, 4.0901342193629775, -1.7277816895172489, 
10.717084148560073, 9.800432896377023, -0.014694571942098885, 
7.635746158881291], "eval_len": [30, 32, 18, 24, 19, 21, 26, 25, 10, 35]}

 78%|███████▊  | 779999/1000000 [13:09:00<2:46:22, 22.04it/s]global step 780000, trans_decision ep_re 7.6376268869137744

{"global_step": 780000, "eval_re": [13.220181940730182, 0.8572832228619274, 
17.763313124802735, 1.8901642211775702, 3.046430942872042, 26.729714242776577, 
-4.32037367658323, 11.186182325271764, -0.7034567289954944, 6.706829254223681], 
"eval_len": [24, 17, 33, 32, 31, 114, 11, 21, 17, 20]}

 79%|███████▉  | 789999/1000000 [13:19:10<2:39:21, 21.96it/s]global step 790000, trans_decision ep_re 4.685924625840928

{"global_step": 790000, "eval_re": [0.1932958790143201, 9.289530925531418, 
6.804541310924221, 0.2865385295307441, -1.2931364154141898, 10.36711129924486, 
6.713845083884108, -0.2004835169896507, 11.089611478568312, 3.6083916841151504],
"eval_len": [12, 32, 18, 14, 13, 34, 17, 27, 25, 28]}

 80%|███████▉  | 799997/1000000 [13:29:20<2:31:10, 22.05it/s]global step 800000, trans_decision ep_re 9.854172806140816

{"global_step": 800000, "eval_re": [48.10784120970628, -1.6249074114365598, 
10.632726682462609, 9.363654708104567, 18.542349315453805, 2.6698993676274387, 
3.5427615611669796, 3.340229839556184, 0.870113222471871, 3.0970595662949942], 
"eval_len": [90, 16, 28, 18, 32, 22, 12, 28, 15, 21]}

 81%|████████  | 809997/1000000 [13:39:12<2:23:38, 22.05it/s]global step 810000, trans_decision ep_re 32.323457093008486

{"global_step": 810000, "eval_re": [1.8836178905165517, 1.2137184434349908, 
297.14777627536694, 2.5425546591733355, -0.15657049823731922, 8.459119923264238,
6.170309127021558, 3.5275839887383773, 5.503819132247135, -3.057358011441025], 
"eval_len": [14, 14, 171, 22, 18, 21, 19, 18, 17, 11]}

 82%|████████▏ | 819999/1000000 [13:49:30<2:15:39, 22.11it/s]global step 820000, trans_decision ep_re 6.458934493194559

{"global_step": 820000, "eval_re": [4.317913615055902, 0.5394247643841859, 
9.406253863125315, -1.3905179246548418, 0.45534174767476343, 18.48544008300573, 
4.238823283577308, 0.22897675872772322, 17.924274032014793, 10.383414709034716],
"eval_len": [21, 18, 23, 13, 11, 39, 33, 12, 29, 22]}

 83%|████████▎ | 829999/1000000 [13:59:40<2:08:24, 22.07it/s]global step 830000, trans_decision ep_re 4.551540643194672

{"global_step": 830000, "eval_re": [9.452504441925122, 2.706247663495268, 
-0.36104415008424884, 4.600407479372279, 16.71368803242617, 
-0.44859238844585464, -1.468409880987083, 6.577935112616678, 5.110652973583048, 
2.6320171480453274], "eval_len": [27, 24, 16, 21, 28, 24, 16, 20, 34, 25]}

 84%|████████▍ | 839999/1000000 [14:09:50<2:01:11, 22.00it/s]global step 840000, trans_decision ep_re 4.58041757393634

{"global_step": 840000, "eval_re": [12.079299252457105, 9.776946868411445, 
0.5484075247903836, 2.9169066554136918, -0.6205700839844195, 3.966388299153554, 
0.5720677514776857, 4.010352035358173, 2.420960894608603, 10.133416541677178], 
"eval_len": [25, 21, 21, 16, 26, 29, 20, 31, 18, 21]}

 85%|████████▍ | 849999/1000000 [14:20:00<1:53:24, 22.04it/s]global step 850000, trans_decision ep_re 6.616063400079838

{"global_step": 850000, "eval_re": [4.751219351089597, 1.5935311589953882, 
1.3977200000007441, 5.867648021573528, -0.955163762731834, 11.682424456397891, 
5.431225303632679, 30.245964141850084, 7.218278950208211, -1.0722136202179016], 
"eval_len": [29, 13, 13, 17, 25, 29, 16, 98, 31, 18]}

 86%|████████▌ | 859999/1000000 [14:30:10<1:46:01, 22.01it/s]global step 860000, trans_decision ep_re 3.08227790783076

{"global_step": 860000, "eval_re": [4.813959278059213, 0.9541361877060694, 
4.050715588034431, -3.4376792586024236, 5.0485612111013785, 6.2666416051433815, 
7.121020772047748, 3.5922498765318185, 0.057532160983904165, 2.355641657302078],
"eval_len": [29, 12, 17, 17, 21, 18, 17, 15, 25, 15]}

 87%|████████▋ | 869999/1000000 [14:40:03<1:38:37, 21.97it/s]global step 870000, trans_decision ep_re 3.4278856862352285

{"global_step": 870000, "eval_re": [4.9512113573816, 2.840740663876335, 
6.459282192416023, -5.304232841433046, 3.4109354703497443, 6.50401647775249, 
-1.8626575155073102, 1.0485187143722126, 5.17422853732192, 11.056813805822316], 
"eval_len": [22, 18, 18, 14, 13, 24, 14, 11, 21, 30]}

 88%|████████▊ | 879999/1000000 [14:50:11<1:30:52, 22.01it/s]global step 880000, trans_decision ep_re 11.492929641349942

{"global_step": 880000, "eval_re": [16.997930630541358, 6.580128812432033, 
5.826028471422241, 1.554317483582277, -0.42627407242211673, 14.31370292206631, 
8.567525829332444, 34.93247008595031, 19.96420917439706, 6.619257076197496], 
"eval_len": [32, 28, 22, 15, 19, 32, 24, 100, 33, 19]}

 89%|████████▉ | 889999/1000000 [15:00:30<1:23:33, 21.94it/s]global step 890000, trans_decision ep_re 56.69896617570517

{"global_step": 890000, "eval_re": [9.744506722241454, 7.6283082685691985, 
0.7562184862898791, 501.53257051058574, 4.364658853222443, 6.483689477708776, 
9.865084711997577, 3.288180718016307, 8.291017141852027, 15.035426866568303], 
"eval_len": [21, 19, 26, 191, 30, 19, 19, 15, 24, 37]}

 90%|████████▉ | 899999/1000000 [15:10:40<1:16:33, 21.77it/s]global step 900000, trans_decision ep_re 6.791840882574666

{"global_step": 900000, "eval_re": [28.035075811883246, 8.369267413330206, 
0.9495263601616784, -2.748660574431904, 4.888551828874989, 8.326368043168106, 
1.856882783821994, 4.8424202172841495, 10.012781784185904, 3.386195157468305], 
"eval_len": [47, 31, 14, 32, 22, 59, 18, 31, 36, 20]}

 91%|█████████ | 909999/1000000 [15:21:00<1:09:02, 21.73it/s]global step 910000, trans_decision ep_re 4.438273755442166

{"global_step": 910000, "eval_re": [7.370127886896174, 3.366490927130262, 
13.264376578309935, 0.07394166330518723, 3.961925712343965, 6.33877200511774, 
1.7316443396137329, 2.2712509704012915, 2.1977618676743944, 3.806445603628981], 
"eval_len": [19, 16, 40, 23, 16, 21, 34, 14, 17, 17]}

 92%|█████████▏| 919999/1000000 [15:31:01<1:01:41, 21.61it/s]global step 920000, trans_decision ep_re 2.7155741146547085

{"global_step": 920000, "eval_re": [0.5022149429565937, 1.3227382879157397, 
4.782904713901556, 7.748102040139847, 6.498445330629346, 6.467283153098466, 
2.462836044976531, 0.7211703516391046, 0.7159556000750359, -4.065909318785129], 
"eval_len": [17, 12, 36, 19, 33, 21, 20, 12, 20, 23]}

 93%|█████████▎| 929999/1000000 [15:41:31<53:33, 21.78it/s]global step 930000, trans_decision ep_re 7.993186317323297

{"global_step": 930000, "eval_re": [-0.8522491820786189, -1.7058207199412396, 
2.6204687863973453, 43.75780998583346, 0.9798829286774509, 9.144974975346384, 
4.437438531737868, 10.932728221973338, 3.5472201033858464, 7.069409541901134], 
"eval_len": [10, 15, 19, 76, 14, 25, 13, 21, 19, 27]}

 94%|█████████▍| 939998/1000000 [15:51:51<45:38, 21.91it/s]global step 940000, trans_decision ep_re 4.290756606668684

{"global_step": 940000, "eval_re": [5.634126845281326, 3.6254160801810005, 
6.370970448352784, -3.7796473409021125, 15.09033367686759, 5.694032631183777, 
5.944809490012209, 2.267150961103599, 4.323752328388819, -2.2633790537821534], 
"eval_len": [18, 33, 17, 19, 32, 16, 20, 16, 13, 10]}

 95%|█████████▍| 949999/1000000 [16:01:51<38:14, 21.79it/s]global step 950000, trans_decision ep_re 4.73079243585485

{"global_step": 950000, "eval_re": [4.80060910830654, 3.590796402279146, 
6.577229153869823, 4.322788354753476, -0.7117536083172675, 9.936738353108149, 
8.715530219088892, 8.809389032417013, 1.1327605404060277, 0.13383680263670975], 
"eval_len": [27, 33, 18, 16, 22, 25, 29, 28, 18, 18]}

 96%|█████████▌| 959999/1000000 [16:12:21<30:33, 21.82it/s]global step 960000, trans_decision ep_re 2.2719077905573135

{"global_step": 960000, "eval_re": [2.699217562107648, 6.649435666431205, 
6.322298872939536, -3.8839037142730373, 1.962304505044858, -0.15660726153375581,
1.0415530256205459, 1.706863583897202, 2.399648751443515, 3.9782669138954168], 
"eval_len": [14, 31, 18, 11, 24, 11, 15, 13, 13, 18]}

 97%|█████████▋| 969999/1000000 [16:22:41<22:49, 21.91it/s]global step 970000, trans_decision ep_re 6.8489721708039495

{"global_step": 970000, "eval_re": [10.19465743732977, 4.699107307288123, 
4.64112103300942, 4.482033465744511, 1.627611420889648, -2.2031407998926946, 
10.467105274237797, 0.32115245457854835, 4.368085564655824, 29.89198855019856], 
"eval_len": [28, 16, 18, 32, 12, 11, 22, 23, 20, 48]}

 98%|█████████▊| 979999/1000000 [16:32:51<14:55, 22.34it/s]global step 980000, trans_decision ep_re 4.798671687339048

{"global_step": 980000, "eval_re": [0.538953198478946, 4.303822367755934, 
2.5948036910733308, 18.895918391193174, 7.175522067559144, -2.016108979880858, 
8.317376085567428, 0.7624227429106167, -0.08041654277401342, 7.494423851506771],
"eval_len": [30, 27, 21, 33, 32, 11, 21, 13, 15, 17]}

 99%|█████████▉| 989999/1000000 [16:42:41<07:43, 21.59it/s]global step 990000, trans_decision ep_re 4.5172071510156275

{"global_step": 990000, "eval_re": [6.072479868570347, -2.697666764870574, 
5.986776480182018, -0.6643294197499667, 7.854493513667744, 0.1845890988502975, 
8.135330662147895, 3.9951307811330614, 0.17183840615369028, 16.133428884071762],
"eval_len": [26, 27, 18, 20, 34, 29, 24, 18, 15, 48]}

100%|█████████▉| 999999/1000000 [16:53:11<00:00, 21.77it/s]global step 1000000, trans_decision ep_re 1.360732616886557

{"global_step": 1000000, "eval_re": [2.0137184304144697, 2.4930141245421558, 
2.125671470865546, -1.3676866558339986, -2.939918263350951, 0.7463463413906479, 
2.480672167023429, -2.119539824744758, 6.966133880197691, 3.208914498361338], 
"eval_len": [15, 32, 12, 26, 9, 13, 25, 12, 21, 24]}

100%|██████████| 1000000/1000000 [16:53:13<00:00, 16.45it/s]
