
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.25
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:40<8:34:21, 32.08it/s]global step 10000, trans_decision ep_re -85.45533212868727

{"global_step": 10000, "eval_re": [9.204062636155424, -2.0645059545481645, 
5.753728175958986, -25.732894858369143, 2.4536668118015488, -169.05651737741056,
1.213761338619256, -41.471155050860126, -366.8411633110441, 
-268.01230369717587], "eval_len": [43, 67, 48, 137, 23, 1000, 43, 296, 1000, 
1000]}

  2%|▏         | 19997/1000000 [10:21<8:29:25, 32.06it/s]global step 20000, trans_decision ep_re -41.48550352341779

{"global_step": 20000, "eval_re": [-5.916600714682073, 23.166319590486218, 
-17.333674599024608, 6.690563644252838, 16.60523082357625, -11.843097791041169, 
-409.42625297943215, -14.096260817452107, 7.615192937817673, 
-10.316455328678742], "eval_len": [39, 60, 91, 20, 43, 45, 1000, 50, 18, 34]}

  3%|▎         | 29999/1000000 [17:20<8:24:20, 32.05it/s]global step 30000, trans_decision ep_re -76.22128765941439

{"global_step": 30000, "eval_re": [-21.789971295826216, -141.09962132491594, 
-204.91227360237852, -36.638429953360266, -59.331234228480206, 
-9.255356951503735, -119.45107173555542, -97.96673444709978, 
-45.187758700009944, -26.580424355013918], "eval_len": [87, 1000, 1000, 95, 172,
43, 178, 259, 87, 112]}

  4%|▍         | 39997/1000000 [24:01<8:18:11, 32.12it/s]global step 40000, trans_decision ep_re -52.78228315828792

{"global_step": 40000, "eval_re": [-19.300132251013736, -145.17335999323672, 
-119.96182081118158, -44.895727457707096, -56.86189690900023, 5.914117210354441,
-23.982791693326927, -32.08853606633787, -78.50595457614816, 
-12.966729035281382], "eval_len": [70, 1000, 219, 143, 256, 21, 45, 102, 205, 
183]}

  5%|▍         | 49999/1000000 [31:10<8:13:06, 32.11it/s]global step 50000, trans_decision ep_re -45.43849800924387

{"global_step": 50000, "eval_re": [-8.64962920307301, -115.64891006403587, 
7.614546775634924, -127.64953980201334, -63.52688749000947, -55.5272559603662, 
-0.08612384729843581, -17.003947920568816, -8.525171765967062, 
-65.3820608147415], "eval_len": [58, 1000, 18, 1000, 184, 142, 43, 72, 45, 325]}

  6%|▌         | 59997/1000000 [38:00<8:03:38, 32.39it/s]global step 60000, trans_decision ep_re -85.10295145116535

{"global_step": 60000, "eval_re": [-215.14968429908723, -34.66868239870874, 
14.470492910512288, -71.4615066263541, -4.235310136300793, -299.54500194121204, 
-9.683584393518048, -91.41320929002616, -21.46138437643288, -117.8816439605259],
"eval_len": [1000, 55, 89, 585, 53, 1000, 63, 355, 74, 1000]}

  7%|▋         | 69997/1000000 [45:00<8:08:20, 31.74it/s]global step 70000, trans_decision ep_re -62.37745769682674

{"global_step": 70000, "eval_re": [-156.24720775542954, -259.26443856991125, 
8.536743636695151, -46.75179249563437, -22.729899972938703, 6.8347997735539145, 
-12.333315015706434, 9.351725774325576, -137.4350812424616, 
-13.736111100760079], "eval_len": [1000, 1000, 23, 122, 27, 97, 147, 66, 1000, 
38]}

  8%|▊         | 79997/1000000 [51:50<7:53:58, 32.35it/s]global step 80000, trans_decision ep_re -24.70369771391376

{"global_step": 80000, "eval_re": [-2.106140820311948, -118.1897194968216, 
-7.508613668866769, 10.096314966242865, -46.862177573034245, 26.16367689874321, 
-56.92737924187415, -106.61948967337435, 28.043456941028282, 26.87309452913112],
"eval_len": [38, 1000, 145, 52, 1000, 218, 552, 1000, 57, 38]}

  9%|▉         | 89997/1000000 [58:30<7:56:18, 31.84it/s]global step 90000, trans_decision ep_re -13.702327011164154

{"global_step": 90000, "eval_re": [-23.501427842958563, 3.8061277817140695, 
17.47944121627988, -47.196696448695874, -8.093254421276072, -15.607856019214127,
-9.56984107567169, -24.07813818979693, 17.665047362591576, -47.926672474613824],
"eval_len": [287, 49, 30, 141, 434, 28, 31, 58, 125, 283]}

 10%|▉         | 99999/1000000 [1:05:30<7:47:18, 32.10it/s]global step 100000, trans_decision ep_re -14.460148433142635

{"global_step": 100000, "eval_re": [-156.2756307418039, -54.0260253994276, 
-27.408439592220546, 4.552511063697734, -8.667016133254942, -9.319450655812664, 
19.86214593979641, 29.211112008549904, -3.8152994402780234, 61.28460861932725], 
"eval_len": [474, 382, 37, 65, 64, 219, 54, 343, 14, 435]}

 11%|█         | 109997/1000000 [1:12:06<7:41:33, 32.14it/s]global step 110000, trans_decision ep_re -8.370153750488672

{"global_step": 110000, "eval_re": [-35.713838115651804, -5.604030560814235, 
-25.583545808575277, -16.255279627032824, 11.590713224486858, 
0.7339941306016735, 19.147623350949786, 1.0418963586520487, -3.4681043069321045,
-29.59096615057083], "eval_len": [66, 101, 150, 71, 33, 70, 25, 391, 26, 51]}

 12%|█▏        | 119999/1000000 [1:19:10<7:37:41, 32.05it/s]global step 120000, trans_decision ep_re -25.668816211232503

{"global_step": 120000, "eval_re": [-8.019838033088883, -110.84781548803213, 
141.32575770168742, -7.986558774903034, -79.03821911258422, -99.9741764926358, 
6.051051292044147, -3.2060918509353584, 26.781423420317385, -121.7736947741945],
"eval_len": [25, 1000, 691, 29, 1000, 1000, 43, 73, 85, 1000]}

 13%|█▎        | 129997/1000000 [1:26:00<7:36:06, 31.79it/s]global step 130000, trans_decision ep_re -2.01836637131182

{"global_step": 130000, "eval_re": [27.00969215708475, 22.969555754088507, 
-30.007543067782073, 103.81178121478564, -81.92894874052229, -39.03190253005234,
-1.6176132471075484, -34.06366401024162, -22.031793897047013, 
34.70677265367579], "eval_len": [331, 64, 209, 494, 1000, 1000, 26, 1000, 284, 
155]}

 14%|█▍        | 139997/1000000 [1:33:00<7:25:53, 32.15it/s]global step 140000, trans_decision ep_re 3.542525015413797

{"global_step": 140000, "eval_re": [-47.262317976712325, 35.62360905596645, 
1.8556450951301804, -22.5674372132797, -22.59044406964979, 1.9159459231708011, 
13.686054336131836, 38.91731759489268, -3.6994649238153277, 39.54634233230317], 
"eval_len": [1000, 111, 143, 112, 1000, 19, 59, 94, 113, 216]}

 15%|█▍        | 149997/1000000 [1:39:50<7:21:57, 32.05it/s]global step 150000, trans_decision ep_re -6.323457233448785

{"global_step": 150000, "eval_re": [19.07953835246467, 37.145749486742005, 
-6.386487964072522, 2.2638278793213424, 50.231041210499775, -106.26364499294264,
0.36322406598363477, 10.407083660217626, -19.5245083068608, -50.55039572584093],
"eval_len": [56, 64, 15, 168, 120, 1000, 81, 154, 250, 268]}

 16%|█▌        | 159997/1000000 [1:46:40<7:11:20, 32.46it/s]global step 160000, trans_decision ep_re 4.780607551922344

{"global_step": 160000, "eval_re": [-38.6440421177837, -19.823840795678315, 
-41.30315869030786, 195.2846389767666, -108.24642572368249, 20.927612881582718, 
-18.53500491989573, 72.47120462954689, 1.0857837564092196, -15.410692477733878],
"eval_len": [1000, 73, 220, 629, 1000, 191, 38, 262, 96, 32]}

 17%|█▋        | 169997/1000000 [1:53:30<7:05:30, 32.51it/s]global step 170000, trans_decision ep_re -7.6655693266771125

{"global_step": 170000, "eval_re": [16.696865784305658, 14.086273163680584, 
-94.62366918707455, -28.37943656370743, 37.28701188613235, -0.4640988218647868, 
-86.548532745618, -0.8561998421694941, 31.370312109314185, 34.77578095023036], 
"eval_len": [20, 32, 1000, 162, 28, 193, 1000, 22, 460, 110]}

 18%|█▊        | 179997/1000000 [2:00:20<6:59:55, 32.55it/s]global step 180000, trans_decision ep_re -2.6295257207386866

{"global_step": 180000, "eval_re": [-1.5369020858911706, -9.915971162669287, 
14.221117629055518, -40.84789927044184, 22.9315865712978, 12.407758773742817, 
43.286987568592366, -88.72651703508885, 28.178017340785726, -6.293435536769941],
"eval_len": [74, 129, 22, 1000, 222, 258, 164, 1000, 31, 55]}

 19%|█▉        | 189997/1000000 [2:07:00<6:56:39, 32.40it/s]global step 190000, trans_decision ep_re -18.71343944359177

{"global_step": 190000, "eval_re": [40.44791175751523, -121.97729150211264, 
-12.564284293497629, -195.07001436521085, 5.372510729993237, 
-53.375879443674094, 47.64788429587433, 0.34886334165604227, 10.809472016509824,
91.22643302702889], "eval_len": [403, 1000, 30, 1000, 57, 227, 1000, 107, 1000, 
225]}

 20%|█▉        | 199997/1000000 [2:13:43<6:47:10, 32.75it/s]global step 200000, trans_decision ep_re 8.954724477126835

{"global_step": 200000, "eval_re": [-37.297347987222665, 3.5941509298504286, 
85.11317179668636, 15.16300119481555, 8.002569047205846, 32.549229995976596, 
-12.13503883456865, -19.425380488147667, 4.8587430978892785, 9.12414601878328], 
"eval_len": [1000, 187, 265, 54, 51, 348, 38, 117, 19, 104]}

 21%|██        | 209996/1000000 [2:20:40<6:45:47, 32.45it/s]global step 210000, trans_decision ep_re -4.7935676555167985

{"global_step": 210000, "eval_re": [-51.088737983816685, 36.65718539387275, 
3.4101008883248234, -3.5919959939948667, -75.25492212456555, -58.14746534875272,
28.98849876383255, 42.84643360472992, 15.796916317567803, 12.44830992763399], 
"eval_len": [1000, 25, 90, 118, 1000, 1000, 67, 45, 117, 267]}

 22%|██▏       | 219997/1000000 [2:27:30<6:36:09, 32.81it/s]global step 220000, trans_decision ep_re -26.714310064882966

{"global_step": 220000, "eval_re": [-64.86702266154559, 1.4272684454136217, 
-81.48740367604907, 1.2746261826098926, -38.859002303346585, 16.756436276061788,
-15.921413740880633, -59.38869877772115, -23.98658846003056, 
-2.0913019333413496], "eval_len": [1000, 68, 1000, 146, 190, 16, 122, 473, 49, 
25]}

 23%|██▎       | 229997/1000000 [2:34:20<6:35:22, 32.46it/s]global step 230000, trans_decision ep_re -1.9435292227217764

{"global_step": 230000, "eval_re": [41.950842754487056, 3.5348635803899544, 
1.5587665896486111, 103.95564652989933, -4.956911998369227, -42.42041153583491, 
-67.4523123165826, -115.83386004765198, 39.270860670906075, 20.95722354588994], 
"eval_len": [381, 26, 144, 591, 45, 109, 1000, 1000, 58, 28]}

 24%|██▍       | 239997/1000000 [2:40:52<6:28:31, 32.60it/s]global step 240000, trans_decision ep_re 6.863280809814308

{"global_step": 240000, "eval_re": [-21.993167268161983, 117.99964728273164, 
-0.6233006086453391, -18.469125993129, -2.0037065766376836, 4.502906333711632, 
58.33004961632807, -1.48174938565719, -32.96454767406663, -34.66419762833044], 
"eval_len": [29, 350, 48, 128, 108, 350, 240, 90, 65, 129]}

 25%|██▍       | 249996/1000000 [2:47:50<6:23:25, 32.60it/s]global step 250000, trans_decision ep_re -5.74388166312757

{"global_step": 250000, "eval_re": [43.43249109246487, -55.562748074455456, 
-3.016884629399757, -8.431572552602487, 5.195703687817593, 25.21722944621276, 
-168.79597037787127, 34.9414475990966, 1.560858957322182, 68.02062822013927], 
"eval_len": [157, 1000, 24, 9, 73, 93, 1000, 111, 54, 157]}

 26%|██▌       | 259996/1000000 [2:54:22<6:21:06, 32.36it/s]global step 260000, trans_decision ep_re 12.915099361267291

{"global_step": 260000, "eval_re": [-41.39533666348877, 73.84129480921591, 
-27.641162268722642, 50.218393242962, 5.103116183249466, 35.14720521755102, 
0.2651420491085317, 19.059590608171536, 9.265189201761132, 5.287561232864729], 
"eval_len": [385, 389, 167, 1000, 73, 48, 21, 98, 38, 31]}

 27%|██▋       | 269997/1000000 [3:01:20<6:17:10, 32.26it/s]global step 270000, trans_decision ep_re 5.523720237185306

{"global_step": 270000, "eval_re": [13.124999143774422, -118.22373765902702, 
14.074104876687626, 91.10451157973021, 9.375953865012548, 48.20369847601349, 
-19.639606757505994, 21.205029648733664, 123.25714713588626, 
-127.24489793745214], "eval_len": [191, 1000, 31, 444, 152, 148, 1000, 47, 294, 
1000]}

 28%|██▊       | 279997/1000000 [3:08:10<6:12:42, 32.20it/s]global step 280000, trans_decision ep_re 24.706392897111513

{"global_step": 280000, "eval_re": [9.353904310608623, 39.068513795249935, 
-4.595089560818983, 13.05913042626593, 4.850454568194702, 15.99446015906826, 
52.58601492312323, 4.478734888313933, 56.48813659239772, 55.77966886871178], 
"eval_len": [30, 158, 157, 335, 1000, 40, 301, 36, 301, 1000]}

 29%|██▉       | 289997/1000000 [3:15:00<6:02:03, 32.68it/s]global step 290000, trans_decision ep_re -8.974881963901392

{"global_step": 290000, "eval_re": [-103.4672484965977, -12.129685127098439, 
17.980379173539966, 77.95050203291376, -8.603038037198456, 6.35403376760377, 
22.62302684527412, 4.219690698310797, -92.3440836478278, -2.3323968479339303], 
"eval_len": [1000, 246, 77, 230, 73, 43, 326, 15, 1000, 26]}

 30%|██▉       | 299997/1000000 [3:21:50<6:00:44, 32.34it/s]global step 300000, trans_decision ep_re 9.589281022369754

{"global_step": 300000, "eval_re": [-56.364970044715974, 77.81263526431613, 
-6.721511946774537, 6.57780886254568, 29.557669480798054, 42.310142597883925, 
24.54161861621796, 31.1398932718331, -48.781374887436364, -4.17910099097044], 
"eval_len": [1000, 163, 30, 452, 148, 158, 29, 55, 1000, 39]}

 31%|███       | 309997/1000000 [3:28:40<5:54:47, 32.41it/s]global step 310000, trans_decision ep_re -4.8773220662517955

{"global_step": 310000, "eval_re": [12.620558283736946, 27.256931810327156, 
-123.07944177857519, -4.97740295485818, -4.950220735538182, 13.45300179241088, 
35.21595485181411, 20.261726390521595, -11.176862439924438, 
-13.397465882432655], "eval_len": [17, 527, 1000, 523, 227, 31, 99, 151, 537, 
85]}

 32%|███▏      | 319997/1000000 [3:35:20<5:50:42, 32.32it/s]global step 320000, trans_decision ep_re 3.3785771322110145

{"global_step": 320000, "eval_re": [15.26317771237575, 8.470919642905384, 
13.601200377386947, 88.22739380370714, -42.19158552304966, 13.004967910362327, 
-13.993214373543617, 39.65993423360073, -139.52768179614836, 51.2706593345135], 
"eval_len": [453, 51, 96, 262, 335, 122, 79, 248, 1000, 365]}

 33%|███▎      | 329997/1000000 [3:42:10<5:43:42, 32.49it/s]global step 330000, trans_decision ep_re -11.58948181458255

{"global_step": 330000, "eval_re": [70.44035152945887, 7.498027150939613, 
12.516445772451979, -50.8873007049964, 17.214921204426677, 17.99875356797568, 
-4.700634342019692, -83.92883867261584, -9.532806973436985, -92.5137366780094], 
"eval_len": [396, 52, 102, 149, 152, 86, 73, 1000, 124, 584]}

 34%|███▍      | 339997/1000000 [3:49:00<5:38:57, 32.45it/s]global step 340000, trans_decision ep_re 6.028280069188616

{"global_step": 340000, "eval_re": [-6.50471981883544, 24.942054676830647, 
85.96799113963192, 63.19617512133415, 8.523793162471229, 1.009122173194535, 
7.273812212076682, -109.49459775826352, 30.832437526901384, 
-45.463267743455404], "eval_len": [1000, 287, 194, 165, 89, 29, 71, 1000, 146, 
234]}

 35%|███▍      | 349997/1000000 [3:55:30<5:32:20, 32.60it/s]global step 350000, trans_decision ep_re 26.3985335872808

{"global_step": 350000, "eval_re": [38.85952991642859, 27.12612285037411, 
61.180168123049896, 17.870602954968412, 36.75541222808892, -8.665107593859208, 
31.599374099961874, 30.60338834756761, 10.939714303711755, 17.716130642516003], 
"eval_len": [39, 297, 144, 143, 93, 96, 47, 84, 50, 276]}

 36%|███▌      | 359999/1000000 [4:02:30<5:28:09, 32.50it/s]global step 360000, trans_decision ep_re 7.555478582696162

{"global_step": 360000, "eval_re": [3.148950304135103, 4.604968258345263, 
17.221269257309995, 23.118432104445766, 2.2791236832675965, -26.47021307005504, 
6.212453924045533, 25.468797000754893, 33.430294669761366, -13.45929030504885], 
"eval_len": [61, 32, 107, 48, 27, 1000, 26, 1000, 84, 535]}

 37%|███▋      | 369997/1000000 [4:09:10<5:24:40, 32.34it/s]global step 370000, trans_decision ep_re -22.16407831603778

{"global_step": 370000, "eval_re": [-342.27185914207297, 119.54619480062216, 
-33.58180758287503, -16.936623266095673, 4.84680171327907, -1.7560077794736886, 
3.9248758684783644, -20.077646473014813, 40.94656212354376, 23.718726577230985],
"eval_len": [1000, 350, 1000, 198, 91, 35, 58, 270, 155, 73]}

 38%|███▊      | 379997/1000000 [4:16:00<5:17:13, 32.57it/s]global step 380000, trans_decision ep_re 8.204192575762104

{"global_step": 380000, "eval_re": [8.789957884047887, 50.93522949507792, 
-0.6909250749729947, -5.225192824711447, -14.300194606088535, 
-1.6066879766042403, 19.750542838647142, 16.92146860265062, -4.151543063053696, 
11.619270482628387], "eval_len": [15, 207, 28, 188, 646, 1000, 148, 103, 188, 
33]}

 39%|███▉      | 389997/1000000 [4:22:50<5:11:46, 32.61it/s]global step 390000, trans_decision ep_re 18.799799398997344

{"global_step": 390000, "eval_re": [14.021075459444116, 39.277084549787055, 
-80.39268464554893, 6.9063541629296985, -5.187163739894856, 2.952945408148075, 
9.975609484991638, 21.837832210482766, 158.8682909039816, 19.738650195652244], 
"eval_len": [114, 1000, 1000, 50, 129, 1000, 30, 1000, 854, 331]}

 40%|███▉      | 399997/1000000 [4:29:40<5:10:25, 32.21it/s]global step 400000, trans_decision ep_re 2.679427359713789

{"global_step": 400000, "eval_re": [35.55237590568482, -76.96695802307084, 
-34.96125464140179, -26.731228557197042, 4.9999906997740595, 26.32385919440753, 
33.18363741868746, 25.30376394880876, 16.974321020353223, 23.11576663109171], 
"eval_len": [167, 1000, 154, 153, 58, 156, 104, 144, 85, 63]}

 41%|████      | 409997/1000000 [4:36:30<4:58:33, 32.94it/s]global step 410000, trans_decision ep_re 15.918671073351115

{"global_step": 410000, "eval_re": [-7.251221153828388, 90.43430442479898, 
-70.75826219265893, -14.403008005680908, 9.24604343784581, 108.38353680551913, 
19.572838241074237, 7.147627744033689, 20.752558372723925, -3.9377069403164087],
"eval_len": [26, 707, 299, 107, 32, 327, 1000, 14, 1000, 24]}

 42%|████▏     | 419997/1000000 [4:43:00<4:58:54, 32.34it/s]global step 420000, trans_decision ep_re 32.47846445205244

{"global_step": 420000, "eval_re": [55.50126527030892, 17.85036166084718, 
61.213458382792794, 28.71316921197944, 92.57962851514529, 24.47968499870214, 
-0.6241989177669924, 29.982702596728892, 9.453834313170455, 5.634738488616268], 
"eval_len": [257, 46, 277, 40, 265, 93, 44, 100, 31, 14]}

 43%|████▎     | 429996/1000000 [4:50:00<4:54:10, 32.29it/s]global step 430000, trans_decision ep_re 9.391724997587696

{"global_step": 430000, "eval_re": [14.946275656287352, 2.6021736865596, 
34.38522689447814, 11.282907775899144, -32.32928391926757, -0.7102561988990141, 
13.581004753246429, 15.183700633623811, 1.788879981259241, 33.186620712689816], 
"eval_len": [115, 31, 1000, 29, 138, 181, 213, 88, 1000, 50]}

 44%|████▍     | 439997/1000000 [4:56:40<4:47:24, 32.47it/s]global step 440000, trans_decision ep_re 18.124077867917062

{"global_step": 440000, "eval_re": [-21.49451962098056, -2.2405090365100238, 
11.253002710823868, -3.6841193220211106, 51.00702518678357, -10.193001106869184,
46.48391993218757, -1.338970848528241, 102.78386660088319, 8.664084183401568], 
"eval_len": [128, 47, 111, 87, 415, 168, 86, 301, 310, 146]}

 45%|████▍     | 449997/1000000 [5:03:11<4:43:04, 32.38it/s]global step 450000, trans_decision ep_re 4.2732746549827

{"global_step": 450000, "eval_re": [2.4068659726083483, 11.494291311234747, 
5.707506434668043, -26.611331094025235, 1.0276462273912172, -6.889012399700193, 
47.274515402062, 8.019485340084884, 8.430205237118393, -8.1274258816152], 
"eval_len": [19, 156, 157, 481, 62, 77, 75, 73, 19, 202]}

 46%|████▌     | 459996/1000000 [5:10:10<4:36:37, 32.54it/s]global step 460000, trans_decision ep_re 10.64349356842715

{"global_step": 460000, "eval_re": [-10.0058606579813, 84.95403354195591, 
-32.39657863743524, 41.72542795461056, -11.594100308664595, 53.14911526579766, 
7.705827184779561, 22.641161536342274, -40.25218474590981, -9.491905449223527], 
"eval_len": [43, 303, 297, 201, 121, 213, 120, 191, 1000, 1000]}

 47%|████▋     | 469997/1000000 [5:17:00<4:32:13, 32.45it/s]global step 470000, trans_decision ep_re 5.8056495562662525

{"global_step": 470000, "eval_re": [6.395609465419297, -31.14462977688663, 
24.506602996893566, -14.35049740135414, 0.27311397539908067, -36.80903874057855,
15.129314790565527, 72.01806065402701, -31.1945209710331, 53.23248057021046], 
"eval_len": [163, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 187]}

 48%|████▊     | 479997/1000000 [5:24:00<4:27:55, 32.35it/s]global step 480000, trans_decision ep_re -24.67338078373046

{"global_step": 480000, "eval_re": [-0.7012367293257464, 21.789019530386007, 
-65.21142791680563, -95.45863346172332, 9.54734447249377, -64.29910833925483, 
-30.404570826041432, 5.479938132228337, -5.503133507971082, 
-21.971999191290717], "eval_len": [77, 184, 259, 262, 1000, 1000, 1000, 20, 220,
244]}

 49%|████▉     | 489997/1000000 [5:30:50<4:23:53, 32.21it/s]global step 490000, trans_decision ep_re 14.217005547843637

{"global_step": 490000, "eval_re": [-0.035933400244045344, 34.92164530526151, 
10.937148688468605, -140.1328985666905, 14.614204364680564, 37.157166312369206, 
47.56786516116456, 38.14992767502544, 55.78114235131777, 43.2097875870833], 
"eval_len": [55, 79, 179, 1000, 78, 88, 238, 158, 211, 251]}

 50%|████▉     | 499997/1000000 [5:37:30<4:17:08, 32.41it/s]global step 500000, trans_decision ep_re -2.5525350303193073

{"global_step": 500000, "eval_re": [10.470386547648207, 2.973437688198436, 
25.622958843958227, 8.584510993646711, 56.870864739135875, -30.978806396849905, 
16.39052602874571, -47.09423484893188, -32.63563250684121, -35.72936139190324], 
"eval_len": [50, 128, 153, 21, 137, 1000, 59, 1000, 130, 108]}

 51%|█████     | 509997/1000000 [5:44:20<4:13:01, 32.28it/s]global step 510000, trans_decision ep_re 9.891922064638715

{"global_step": 510000, "eval_re": [26.311009748676955, 32.57512275709472, 
81.33818420477469, -43.1528916534033, -54.32505095560418, 42.294104436118275, 
-0.7778461123199609, 14.856666151413444, 3.5046723818438386, 
-3.7047503122073167], "eval_len": [300, 262, 302, 1000, 246, 1000, 464, 61, 563,
31]}

 52%|█████▏    | 519997/1000000 [5:51:10<4:07:30, 32.32it/s]global step 520000, trans_decision ep_re 0.9341199546416579

{"global_step": 520000, "eval_re": [5.289096448983135, 49.510100325480586, 
-52.25471621229139, 11.292375143064861, 31.465981277228693, -8.479380299164827, 
-28.051646491993736, 2.966301272467337, -0.25082542806299846, 
-2.14608648929508], "eval_len": [557, 259, 501, 141, 79, 23, 603, 20, 276, 36]}

 53%|█████▎    | 529997/1000000 [5:58:00<4:03:50, 32.13it/s]global step 530000, trans_decision ep_re 9.849584257208601

{"global_step": 530000, "eval_re": [20.242942839212038, -122.47897493486855, 
94.55719034996609, -26.786998219340404, 54.278768906115104, 20.129193085336603, 
84.67484023933518, -4.214872149195181, -28.785282843389187, 6.8790352989142995],
"eval_len": [138, 1000, 415, 170, 276, 534, 340, 166, 383, 8]}

 54%|█████▍    | 539997/1000000 [6:04:40<3:55:44, 32.52it/s]global step 540000, trans_decision ep_re -8.388922537167796

{"global_step": 540000, "eval_re": [-7.747696273959851, -23.833253149559887, 
5.027378682112072, -5.799410326739125, 1.2581117890338616, 8.71430692513442, 
25.70459466757351, 43.8434620644196, -43.104796268815086, -87.95192348087747], 
"eval_len": [60, 69, 89, 176, 291, 73, 219, 141, 1000, 1000]}

 55%|█████▍    | 549997/1000000 [6:11:16<3:50:50, 32.49it/s]global step 550000, trans_decision ep_re 14.757349818922984

{"global_step": 550000, "eval_re": [77.26179079939637, 41.52341588861643, 
-4.987779777436049, -24.40625796945531, 29.487170605647343, -48.78159205245244, 
38.324110381929266, 9.584914241321398, 5.240096191755848, 24.327629879906986], 
"eval_len": [256, 84, 24, 49, 32, 209, 80, 238, 153, 174]}

 56%|█████▌    | 559999/1000000 [6:18:10<3:43:09, 32.86it/s]global step 560000, trans_decision ep_re -5.171995002132532

{"global_step": 560000, "eval_re": [-3.624710943642398, -14.58503212750944, 
35.36396809524016, -3.372106252060762, -21.704953066604354, -53.027985063846074,
-19.107787480014338, -39.48654439551626, 34.2675903400692, 33.557610872558946], 
"eval_len": [69, 29, 240, 21, 64, 204, 398, 1000, 49, 162]}

 57%|█████▋    | 569997/1000000 [6:25:00<3:42:06, 32.27it/s]global step 570000, trans_decision ep_re -8.665175679127694

{"global_step": 570000, "eval_re": [43.64720242977661, 10.204818828003052, 
5.331789188047326, -16.177032750118723, -48.17107337332212, 25.46184907415916, 
16.3220645322162, 19.93583880767235, -15.050699338606034, -128.15651418910477], 
"eval_len": [275, 195, 12, 82, 1000, 190, 471, 153, 32, 1000]}

 58%|█████▊    | 579997/1000000 [6:31:35<3:33:58, 32.71it/s]global step 580000, trans_decision ep_re 3.990977789246249

{"global_step": 580000, "eval_re": [-70.69264521352008, -30.293472377662766, 
72.763784203611, 55.50562689361324, 24.2716628859419, 17.341060596293445, 
-16.417610242964052, -3.6366864385324296, -2.3281146002337634, 
-6.603827814084008], "eval_len": [1000, 51, 277, 135, 39, 20, 197, 43, 78, 67]}

 59%|█████▉    | 589999/1000000 [6:38:30<3:28:11, 32.82it/s]global step 590000, trans_decision ep_re -25.1549598072073

{"global_step": 590000, "eval_re": [-0.6324706874517249, -5.802232714076837, 
13.872132870608473, -101.63923169613368, -1.5508491317592963, 
-37.640635387226794, -6.757268472510906, -47.943296547670364, 
-24.441623836829745, -39.014122469022155], "eval_len": [107, 100, 39, 1000, 625,
1000, 330, 283, 146, 77]}

 60%|█████▉    | 599997/1000000 [6:45:20<3:25:26, 32.45it/s]global step 600000, trans_decision ep_re -11.374337841490172

{"global_step": 600000, "eval_re": [-2.912094440121728, 9.981884609802215, 
14.108568671778785, -77.24700620657575, -106.87529923403115, 20.443369646538844,
-0.00546156082042637, 32.383977663951335, -0.06354698529128422, 
-3.557770580132568], "eval_len": [20, 226, 78, 1000, 327, 63, 30, 52, 74, 80]}

 61%|██████    | 609997/1000000 [6:52:10<3:17:40, 32.88it/s]global step 610000, trans_decision ep_re -21.484497309945187

{"global_step": 610000, "eval_re": [7.7253302434830164, 45.291765134113575, 
-18.75851597861273, -62.16439715464897, 63.798743543971774, -9.552771359162254, 
-170.91332549611315, 5.048944562541424, -67.57894052229535, -7.741806072729221],
"eval_len": [197, 1000, 255, 1000, 318, 102, 825, 187, 1000, 48]}

 62%|██████▏   | 619997/1000000 [6:59:00<3:14:50, 32.50it/s]global step 620000, trans_decision ep_re -8.975837570059577

{"global_step": 620000, "eval_re": [30.88459923525411, 53.59761258086613, 
15.513589469783385, -15.120639733002767, -41.15942973522126, 3.512845783668169, 
-75.56648104976871, -31.96938549380885, 16.336743693593213, 
-45.787830451959195], "eval_len": [43, 152, 1000, 97, 246, 9, 1000, 223, 184, 
209]}

 63%|██████▎   | 629999/1000000 [7:05:50<3:10:58, 32.29it/s]global step 630000, trans_decision ep_re -7.98984311562657

{"global_step": 630000, "eval_re": [14.348505470706318, 25.738549025896155, 
4.444199611221615, 52.40702277176958, -76.10234304075072, -12.33526806505062, 
-74.59077162812629, 8.17861400378363, 26.563663022945683, -48.55060232866106], 
"eval_len": [36, 146, 19, 318, 1000, 97, 1000, 25, 134, 48]}

 64%|██████▍   | 639997/1000000 [7:12:21<3:04:29, 32.52it/s]global step 640000, trans_decision ep_re 7.345085458799568

{"global_step": 640000, "eval_re": [-10.724949450224193, 23.418802972188185, 
2.968250625135979, -0.8914446048348471, -7.1855216432081255, 13.904913589073283,
6.634343100396161, 14.558276674189232, -30.196065757613763, 60.964249082893765],
"eval_len": [1000, 1000, 128, 87, 21, 23, 87, 26, 163, 118]}

 65%|██████▍   | 649996/1000000 [7:19:20<2:59:25, 32.51it/s]global step 650000, trans_decision ep_re -9.383793405788671

{"global_step": 650000, "eval_re": [50.63193567580724, 22.691741804749057, 
-112.78471854858564, 25.712182600323665, -8.045143678454929, 58.95334539351585, 
13.366081871499533, -94.21263440658244, -52.7586033892132, 2.6078786190541594], 
"eval_len": [211, 139, 1000, 79, 26, 180, 74, 307, 1000, 22]}

 66%|██████▌   | 659997/1000000 [7:26:10<2:54:39, 32.44it/s]global step 660000, trans_decision ep_re -11.430238789894382

{"global_step": 660000, "eval_re": [-8.3762550805704, -11.752660353621707, 
27.51845520774871, -50.50655764007781, 9.378511025575538, -12.010580188951689, 
-0.07278985157003692, -56.61177736772064, -60.70330924132686, 
48.834575591571095], "eval_len": [59, 49, 171, 477, 139, 64, 113, 1000, 290, 
779]}

 67%|██████▋   | 669997/1000000 [7:32:40<2:49:50, 32.38it/s]global step 670000, trans_decision ep_re -0.29314944533943255

{"global_step": 670000, "eval_re": [25.494729956051707, -12.561620263282405, 
8.457488219571548, 18.271100961799352, -28.506769246568513, -12.605802108563052,
-34.21995736365712, 30.303093693390203, -21.202883071346132, 
23.639124769210092], "eval_len": [37, 66, 144, 65, 642, 438, 150, 219, 1000, 
164]}

 68%|██████▊   | 679999/1000000 [7:39:40<2:44:15, 32.47it/s]global step 680000, trans_decision ep_re 11.19595446425819

{"global_step": 680000, "eval_re": [28.99906525941294, -36.240148851744294, 
12.444896748810626, -8.211260835641212, 11.83812468005135, 94.04713060280704, 
-25.555601121566994, -12.878806135189794, 2.933952775507236, 
44.582191520135005], "eval_len": [90, 173, 21, 354, 33, 1000, 107, 38, 17, 276]}

 69%|██████▉   | 689997/1000000 [7:46:20<2:36:48, 32.95it/s]global step 690000, trans_decision ep_re -12.848880822105674

{"global_step": 690000, "eval_re": [76.3563090775952, -39.168026100519, 
-7.10523498465616, 0.5038203155897445, -0.9407432263472122, -51.93332987228562, 
-10.598881628785636, -78.92253827061616, -14.807137119661132, 
-1.8730464113707577], "eval_len": [248, 74, 161, 102, 23, 1000, 21, 197, 69, 
112]}

 70%|██████▉   | 699997/1000000 [7:53:10<2:35:38, 32.12it/s]global step 700000, trans_decision ep_re -18.146791809595495

{"global_step": 700000, "eval_re": [3.9680872286618047, -19.481715787502196, 
-2.5061452242618083, -9.876095568804876, 31.433315947451216, 
-108.90066400399917, -87.4065043546675, 13.935652255279411, -19.426606508487218,
16.79275792037538], "eval_len": [259, 63, 133, 56, 645, 332, 1000, 1000, 41, 
116]}

 71%|███████   | 709997/1000000 [7:59:50<2:27:15, 32.82it/s]global step 710000, trans_decision ep_re -27.68585161741072

{"global_step": 710000, "eval_re": [-6.807620381883942, -75.02283658501149, 
-8.503331488175938, -10.87297668906441, -68.26614355183622, -10.229374157959597,
-37.153492502306115, 2.2093400191405195, -79.95847033994791, 
17.746389502937934], "eval_len": [85, 608, 59, 265, 1000, 1000, 89, 110, 1000, 
100]}

 72%|███████▏  | 719997/1000000 [8:06:40<2:21:35, 32.96it/s]global step 720000, trans_decision ep_re -20.688052572098297

{"global_step": 720000, "eval_re": [-14.893453526356115, -163.9857161665704, 
-27.867200413677878, -16.31601484051077, -2.6205242336577816, 
-9.164651334471039, -129.88834723263713, 107.72990020652094, 8.660208290997392, 
41.46527352937982], "eval_len": [83, 389, 1000, 115, 17, 26, 409, 1000, 19, 
1000]}

 73%|███████▎  | 729997/1000000 [8:13:30<2:16:50, 32.89it/s]global step 730000, trans_decision ep_re -13.343829093728413

{"global_step": 730000, "eval_re": [0.3706940814191282, 17.286181222068166, 
-4.420898519896108, -11.261646383796563, -7.581395203949984, -4.265071543788924,
-70.05839369692026, -28.178670678930725, 12.752439095991358, 
-38.081529309480246], "eval_len": [46, 233, 67, 23, 13, 166, 1000, 94, 168, 74]}

 74%|███████▍  | 739999/1000000 [8:20:10<2:11:42, 32.90it/s]global step 740000, trans_decision ep_re -21.25732761885074

{"global_step": 740000, "eval_re": [-6.429635230302869, 8.965533352544941, 
2.5960066160927084, 3.5719633864822344, 19.675812199802113, 1.3761657646558503, 
-222.7414258178737, -31.863843503254213, 2.7027614703138, 9.57338557303173], 
"eval_len": [201, 21, 23, 57, 22, 376, 655, 209, 1000, 626]}

 75%|███████▍  | 749997/1000000 [8:26:42<2:10:29, 31.93it/s]global step 750000, trans_decision ep_re -44.39039509969985

{"global_step": 750000, "eval_re": [16.239266414634848, 13.371670734069326, 
23.224138740692705, 3.3164089117610343, -25.974557293549218, -544.9008014063786,
-33.4894201746093, 93.36173028190746, -2.488503561103135, 13.436116355576349], 
"eval_len": [43, 25, 230, 12, 112, 1000, 469, 258, 35, 261]}

 76%|███████▌  | 759999/1000000 [8:33:40<2:01:49, 32.83it/s]global step 760000, trans_decision ep_re -47.00655376949774

{"global_step": 760000, "eval_re": [-85.13953204313503, 3.2486660045954125, 
19.594756717046558, -12.366239530949645, -103.6847710346234, -45.89920537921684,
-39.96549055835422, -34.91148844270779, -111.16866409359103, 
-59.77356933404144], "eval_len": [1000, 111, 696, 61, 1000, 1000, 1000, 82, 
1000, 512]}

 77%|███████▋  | 769997/1000000 [8:40:40<1:56:31, 32.90it/s]global step 770000, trans_decision ep_re -99.10711052710583

{"global_step": 770000, "eval_re": [-496.4003408117712, 35.9742071189191, 
-56.869013182498435, 38.65375569090679, -70.88688347382593, -13.05621930114464, 
-11.897325228400469, -132.0587662030477, -258.4488314905391, 
-26.081688389656883], "eval_len": [1000, 1000, 1000, 87, 332, 168, 73, 1000, 
1000, 381]}

 78%|███████▊  | 779997/1000000 [8:47:30<1:51:33, 32.87it/s]global step 780000, trans_decision ep_re -6.185879918338803

{"global_step": 780000, "eval_re": [-91.22168694275985, -26.08366066999887, 
-2.2603037091561187, -9.127497373033947, 72.76224783326072, 9.104207035113607, 
-17.684640791736726, -2.558116188584239, 21.773421740416865, 
-16.562770116909476], "eval_len": [1000, 1000, 67, 195, 444, 13, 1000, 246, 98, 
1000]}

 79%|███████▉  | 789997/1000000 [8:54:20<1:46:40, 32.81it/s]global step 790000, trans_decision ep_re -35.11519630676491

{"global_step": 790000, "eval_re": [-69.72628694739076, 0.6818923585043157, 
7.305650173641489, -42.6988957798221, -164.12343457425663, -29.21648948043183, 
2.0463712253223756, -30.67695359109013, -25.44731081880674, 0.7034943666809059],
"eval_len": [665, 155, 129, 877, 1000, 105, 28, 38, 1000, 85]}

 80%|███████▉  | 799997/1000000 [9:01:10<1:41:10, 32.95it/s]global step 800000, trans_decision ep_re -16.31739275265527

{"global_step": 800000, "eval_re": [-61.41979338630305, 2.616169639755798, 
45.57231498480304, 1.837789794965445, 5.422121983545651, 6.124126023575246, 
-12.80641163896159, 31.245368017911844, -68.25493212888559, 
-113.51068081695948], "eval_len": [280, 1000, 83, 25, 245, 39, 53, 49, 1000, 
1000]}

 81%|████████  | 809997/1000000 [9:07:42<1:36:23, 32.85it/s]global step 810000, trans_decision ep_re -1.3625355246607616

{"global_step": 810000, "eval_re": [32.04803917897406, 29.443139915640767, 
-14.951187768911556, -32.29953999292972, 11.976762275780866, -1.816949293845585,
17.079936576846368, -34.021271981402, -43.671547972951494, 22.58726381619069], 
"eval_len": [87, 100, 564, 308, 32, 35, 56, 212, 106, 36]}

 82%|████████▏ | 819996/1000000 [9:14:21<1:31:16, 32.87it/s]global step 820000, trans_decision ep_re -10.583616741444468

{"global_step": 820000, "eval_re": [4.200437863029139, 13.110270176944336, 
15.153477752185577, -10.2293739697338, -4.023030119926378, -37.327417900893735, 
14.432868027183263, 24.976938228656966, -75.6206690135901, -50.50966845829994], 
"eval_len": [19, 283, 25, 35, 46, 288, 42, 191, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [9:21:20<1:26:02, 32.93it/s]global step 830000, trans_decision ep_re -31.25679109330639

{"global_step": 830000, "eval_re": [70.47135775258147, -238.40969109744307, 
3.3336115439000085, 10.782355774529305, 47.89129192487539, -102.37068608168505, 
22.38646921979445, -81.54421877971345, 37.058581574438435, -82.16698276434144], 
"eval_len": [387, 719, 216, 164, 59, 1000, 53, 1000, 132, 1000]}

 84%|████████▍ | 839997/1000000 [9:28:10<1:21:12, 32.84it/s]global step 840000, trans_decision ep_re -20.99253485922186

{"global_step": 840000, "eval_re": [-25.530544146156984, 21.824933121568908, 
-69.71256936079655, -9.192432688879366, -61.94481731178401, -73.81364481059623, 
33.610448716951346, -71.13734087212372, 15.79113590901028, 30.179482850587735], 
"eval_len": [409, 51, 511, 296, 1000, 1000, 71, 1000, 18, 118]}

 85%|████████▍ | 849997/1000000 [9:35:00<1:16:03, 32.87it/s]global step 850000, trans_decision ep_re -13.511419488770182

{"global_step": 850000, "eval_re": [2.8714383397366094, -47.46926149399376, 
5.275282784014457, -73.48804237750396, 66.05222322897497, -11.856717099532501, 
26.98037651872693, -134.69069501033854, 32.23357421984493, -1.0223739976309396],
"eval_len": [26, 1000, 52, 1000, 223, 1000, 80, 753, 73, 17]}

 86%|████████▌ | 859997/1000000 [9:41:50<1:10:47, 32.96it/s]global step 860000, trans_decision ep_re 2.0835298215651354

{"global_step": 860000, "eval_re": [-35.107691086968586, 7.270098168161347, 
13.286436588019367, 56.924876839072816, -42.336165756331994, 6.3160412753042845,
-32.95517409718529, -21.642943464777737, 42.292166488606995, 26.78765326175016],
"eval_len": [1000, 52, 26, 139, 219, 73, 826, 309, 271, 94]}

 87%|████████▋ | 869997/1000000 [9:48:30<1:05:52, 32.89it/s]global step 870000, trans_decision ep_re -25.197805932102078

{"global_step": 870000, "eval_re": [35.98515652581486, -3.4749872855715465, 
-9.476986163328165, 17.159172671900823, 7.960251338303747, -249.69012609553585, 
-118.43418741054654, 12.828743884366535, -23.639772829628317, 
78.80467604320367], "eval_len": [108, 47, 46, 31, 78, 752, 1000, 73, 98, 400]}

 88%|████████▊ | 879997/1000000 [9:55:02<1:00:57, 32.81it/s]global step 880000, trans_decision ep_re -0.8123621684360796

{"global_step": 880000, "eval_re": [9.996329616437468, 2.9247880265165325, 
73.45897688262966, -12.166702940922674, 0.12299109830279475, 
-117.17013086332805, -19.658688351082958, -5.917557093212929, 32.20341608624887,
28.082955854050507], "eval_len": [325, 64, 318, 127, 124, 1000, 34, 352, 106, 
364]}

 89%|████████▉ | 889999/1000000 [10:02:00<55:43, 32.90it/s]global step 890000, trans_decision ep_re -19.145635300868925

{"global_step": 890000, "eval_re": [-19.08755276839223, 28.988624605624302, 
-22.8363900625763, -39.340001405298295, 6.20191073931124, 2.94838337172481, 
-2.797436213543227, -48.85566141481622, -4.211540077894961, -92.46668978282834],
"eval_len": [659, 319, 66, 1000, 28, 13, 30, 1000, 63, 264]}

 90%|████████▉ | 899997/1000000 [10:08:31<50:37, 32.92it/s]global step 900000, trans_decision ep_re -8.786868769999844

{"global_step": 900000, "eval_re": [-43.49362492533349, 47.88413221101414, 
-33.357308395730115, -0.8094594190009945, 42.99054895852349, -4.93655761725185, 
-24.832506790843976, 6.358839108602416, 14.333169613209233, -92.00592044318728],
"eval_len": [125, 57, 373, 89, 110, 13, 194, 53, 67, 1000]}

 91%|█████████ | 909999/1000000 [10:15:30<45:39, 32.85it/s]global step 910000, trans_decision ep_re -26.45181441658432

{"global_step": 910000, "eval_re": [-78.12722723910908, 5.298981594763098, 
13.46655426256168, 25.992068228827414, -4.336558575873174, 11.480197039114634, 
43.60019106996714, -0.2656296180084976, 3.807189715691684, -285.43391064377806],
"eval_len": [1000, 64, 193, 334, 25, 22, 369, 251, 65, 1000]}

 92%|█████████▏| 919997/1000000 [10:22:10<40:31, 32.90it/s]global step 920000, trans_decision ep_re -1.3267729784937725

{"global_step": 920000, "eval_re": [51.84395993017989, -51.704364635122374, 
18.817432165112475, 31.020562840722526, -12.925718821725415, 2.9629787873482574,
-53.69449071375177, 17.728868074024344, -3.0671662130397266, 
-14.24979119868593], "eval_len": [177, 1000, 24, 30, 30, 128, 1000, 107, 48, 
114]}

 93%|█████████▎| 929997/1000000 [10:29:00<35:32, 32.83it/s]global step 930000, trans_decision ep_re -32.240924049807475

{"global_step": 930000, "eval_re": [-18.879258576801305, -17.87896084245011, 
7.3354870143492645, -33.53998528680899, 47.71745364476485, 1.1792653009946443, 
-147.223607255076, 32.232710686724054, -169.22952285073927, -24.12282233303185],
"eval_len": [422, 43, 1000, 1000, 186, 148, 1000, 112, 1000, 89]}

 94%|█████████▍| 939997/1000000 [10:35:50<30:25, 32.87it/s]global step 940000, trans_decision ep_re -32.50842882637936

{"global_step": 940000, "eval_re": [-23.47721693119527, 9.871914231664459, 
-115.775204729629, 13.647973869269826, -118.72742445591253, -7.480658039175119, 
-2.2739435516964663, -12.640868128735107, -44.29672464236689, 
-23.932135886017463], "eval_len": [30, 253, 197, 44, 1000, 1000, 135, 184, 1000,
724]}

 95%|█████████▍| 949997/1000000 [10:42:40<25:21, 32.86it/s]global step 950000, trans_decision ep_re -12.200736232982258

{"global_step": 950000, "eval_re": [16.57339987609255, -34.846569601511, 
2.208513504515088, 47.96350592331726, -17.68159252682426, -139.35988640318655, 
-5.646448630575701, 9.062200689279733, -16.43412844921873, 16.153643288289015], 
"eval_len": [35, 64, 417, 1000, 83, 1000, 186, 35, 208, 139]}

 96%|█████████▌| 959998/1000000 [10:49:20<20:17, 32.84it/s]global step 960000, trans_decision ep_re -19.73498028084121

{"global_step": 960000, "eval_re": [40.26647091310377, -203.25572062556387, 
-144.1887202927474, 23.860245211578874, 48.21734606557519, 56.32092583688053, 
3.2126244653564635, -63.75895426219613, -23.01064106478633, 64.98662094438673], 
"eval_len": [107, 1000, 1000, 359, 865, 142, 26, 405, 1000, 586]}

 97%|█████████▋| 969997/1000000 [10:56:10<15:12, 32.87it/s]global step 970000, trans_decision ep_re -8.09077369112739

{"global_step": 970000, "eval_re": [2.652666347773558, -18.29365516855579, 
-28.497873843230792, -36.07427811567747, -3.1894582320976115, -8.8120040779627, 
37.67401166530401, -23.447268019926007, 2.8209705880478424, -5.740848054948936],
"eval_len": [32, 75, 1000, 1000, 45, 169, 98, 109, 1000, 69]}

 98%|█████████▊| 979997/1000000 [11:03:00<10:09, 32.84it/s]global step 980000, trans_decision ep_re -6.210797420326299

{"global_step": 980000, "eval_re": [-141.7316556346369, 12.173581059908894, 
52.04224086976259, 17.06273473308011, -76.02825519969684, -39.39233862540325, 
59.670100271818434, 54.06537079685266, 8.292126791192745, -8.26187926614142], 
"eval_len": [1000, 49, 256, 160, 1000, 82, 377, 464, 219, 45]}

 99%|█████████▉| 989997/1000000 [11:09:50<05:04, 32.87it/s]global step 990000, trans_decision ep_re -3.2455184810145368

{"global_step": 990000, "eval_re": [63.75106072969506, -32.726435562454704, 
82.20787095894977, -58.410081208415704, -160.14918073104167, 26.180738317115477,
-108.07921600047314, 84.738043266455, 98.23747833124905, -28.20546291122451], 
"eval_len": [225, 251, 316, 1000, 1000, 145, 1000, 622, 316, 101]}

100%|█████████▉| 999998/1000000 [11:16:40<00:00, 32.79it/s]global step 1000000, trans_decision ep_re -31.590645725123913

{"global_step": 1000000, "eval_re": [-4.94110172885346, -10.109029581700682, 
-24.949004835891888, -19.981141734035145, 36.71961903636236, 
-109.43551573234245, 30.76354716433771, -21.944298233865865, 
-53.126394499122775, -138.90313710612696], "eval_len": [249, 390, 40, 42, 58, 
1000, 118, 56, 1000, 663]}

100%|██████████| 1000000/1000000 [11:16:45<00:00, 24.63it/s]
