
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.2
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:24<8:35:34, 32.00it/s]global step 10000, trans_decision ep_re 157.5063133069059

{"global_step": 10000, "eval_re": [1.5872093164845236, 3.276133826042868, 
7.635535844714941, 26.03002366609681, 135.97270162326433, 344.6873395094558, 
314.0145146947163, 10.236993020700119, 370.6749971165799, 360.94768445100306], 
"eval_len": [12, 17, 19, 48, 82, 234, 190, 19, 254, 213]}

  2%|▏         | 19999/1000000 [10:15<8:25:50, 32.29it/s]global step 20000, trans_decision ep_re 66.48637749904273

{"global_step": 20000, "eval_re": [10.037987603143039, 13.432832379577562, 
17.383660919063463, 7.051633801151969, 3.3727292031517946, 207.80211821823463, 
4.102077175499479, 41.74746853591762, 77.23387737700614, 282.6993897776816], 
"eval_len": [31, 32, 38, 52, 16, 126, 15, 59, 132, 165]}

  3%|▎         | 29999/1000000 [17:06<8:31:41, 31.59it/s]global step 30000, trans_decision ep_re 135.58413386142678

{"global_step": 30000, "eval_re": [200.58830633010047, 3.5363640183109273, 
70.68554618789781, 218.6764822532297, 564.1287051689094, 143.80612546840246, 
5.161883806715872, -1.6912739442417686, 145.89686905135122, 5.052330273591642], 
"eval_len": [107, 24, 80, 117, 505, 87, 17, 10, 93, 19]}

  4%|▍         | 39997/1000000 [23:57<8:35:18, 31.05it/s]global step 40000, trans_decision ep_re 11.571895389530349

{"global_step": 40000, "eval_re": [6.817461172825563, 7.65128531828821, 
16.0958059188101, 1.214306448666492, 6.893531072903728, 52.76608587335387, 
5.137227602738248, 3.9336212944170814, 8.013895184076063, 7.195734009224125], 
"eval_len": [31, 21, 39, 14, 33, 95, 31, 14, 19, 18]}

  5%|▍         | 49997/1000000 [30:45<8:15:50, 31.93it/s]global step 50000, trans_decision ep_re 45.76685138251766

{"global_step": 50000, "eval_re": [0.30061538991557785, 10.021242532265354, 
0.11640425442547833, 5.385680915456992, 4.307035874153862, 181.4560655403266, 
13.374075614566166, 186.8772945034135, 7.458716124495407, 48.37138307615758], 
"eval_len": [14, 34, 13, 15, 14, 91, 32, 99, 21, 47]}

  6%|▌         | 59997/1000000 [37:34<8:14:15, 31.70it/s]global step 60000, trans_decision ep_re 16.789215939356616

{"global_step": 60000, "eval_re": [5.4130486627601035, 6.302764899723228, 
3.2847631946303246, 6.634534156155342, 3.061149985005799, 108.45225487692112, 
12.95864528408481, 7.78715903654488, 12.334486729613634, 1.6633525681268948], 
"eval_len": [17, 19, 18, 17, 15, 121, 25, 19, 26, 19]}

  7%|▋         | 69997/1000000 [44:22<8:20:25, 30.97it/s]global step 70000, trans_decision ep_re 30.873250557603836

{"global_step": 70000, "eval_re": [7.416046633646652, 13.805432211655882, 
-1.230864448913976, 2.0144238477847134, 5.5379343296982935, 6.11746566929491, 
11.644937561214899, 11.652125429543618, 234.03122312319837, 17.74378121891499], 
"eval_len": [19, 30, 11, 12, 23, 21, 26, 28, 122, 35]}

  8%|▊         | 79997/1000000 [51:10<8:00:36, 31.90it/s]global step 80000, trans_decision ep_re 249.01330228462925

{"global_step": 80000, "eval_re": [264.0956233678026, 3.978037789562546, 
0.8035533750077268, 993.5762112491662, 1.018879988738668, 3.9364377757221174, 
-0.9352036054262807, 5.88160549826404, 1058.0858089718438, 159.69206843561096], 
"eval_len": [199, 14, 14, 455, 21, 19, 16, 17, 535, 102]}

  9%|▉         | 89999/1000000 [58:02<7:57:22, 31.77it/s]global step 90000, trans_decision ep_re 60.17610065872784

{"global_step": 90000, "eval_re": [6.625823977632074, 11.57343801097333, 
194.57851296444022, 1.6579709657191124, 2.237123536783, 368.36170118912423, 
20.761220623413745, -4.269328824086943, 2.2205078403091827, 
-1.9859636970295487], "eval_len": [18, 24, 165, 14, 24, 231, 48, 21, 21, 14]}

 10%|▉         | 99999/1000000 [1:04:52<7:45:53, 32.20it/s]global step 100000, trans_decision ep_re 54.16009402129208

{"global_step": 100000, "eval_re": [5.566170643109187, 7.995828944276361, 
34.59111465747987, 1.0362118098444344, 7.064869817493193, 191.60433527196594, 
5.403893652050272, 3.4096900669507764, 278.91637790506786, 6.012447444682919], 
"eval_len": [27, 24, 44, 16, 29, 143, 15, 15, 148, 19]}

 11%|█         | 109999/1000000 [1:11:42<7:53:17, 31.34it/s]global step 110000, trans_decision ep_re 51.43638406720139

{"global_step": 110000, "eval_re": [3.344675484224354, 2.5689230246720713, 
317.9795060061827, 2.833397369771356, 3.184793804191665, 2.641177578262809, 
150.674439760182, 3.039314243089647, 23.219392582549993, 4.87822081888726], 
"eval_len": [19, 19, 233, 13, 14, 22, 107, 19, 35, 18]}

 12%|█▏        | 119999/1000000 [1:18:31<7:52:08, 31.06it/s]global step 120000, trans_decision ep_re 222.47807463909834

{"global_step": 120000, "eval_re": [0.36673354532649927, 545.8684165630572, 
114.78901594586686, -0.9955145233339693, 2.265810038965537, 3.032375444179733, 
867.1814035217772, 222.7550521213285, 443.5186058278337, 25.998847905982508], 
"eval_len": [14, 261, 88, 19, 16, 17, 322, 141, 276, 47]}

 13%|█▎        | 129997/1000000 [1:25:23<7:43:19, 31.30it/s]global step 130000, trans_decision ep_re 306.7409522300097

{"global_step": 130000, "eval_re": [1283.84380487803, 4.217691229419733, 
455.70511575650465, -0.06031447389767047, 2.769733924519979, 284.37552946569957,
-2.0466509639690744, 107.11986533236636, 925.1972927785348, 6.287454372889202], 
"eval_len": [441, 18, 185, 25, 18, 130, 29, 89, 390, 20]}

 14%|█▍        | 139999/1000000 [1:32:15<7:32:17, 31.69it/s]global step 140000, trans_decision ep_re 93.73299687265829

{"global_step": 140000, "eval_re": [194.34479570381342, 165.18562035783327, 
8.49538547837577, 1.8125843238273176, 6.020079375325098, 37.1914261104693, 
3.2676887339220166, 2.735611679335192, 8.358178992195976, 509.91859797148555], 
"eval_len": [108, 104, 19, 12, 19, 48, 16, 14, 33, 202]}

 15%|█▍        | 149997/1000000 [1:39:05<7:39:11, 30.85it/s]global step 150000, trans_decision ep_re 162.75919354920362

{"global_step": 150000, "eval_re": [374.71727032472353, 204.3726722978853, 
0.5465158621586443, 663.4513287590133, 0.24485210450186362, 1.2653823390640193, 
28.658620860653375, -0.030767832937102002, 0.3706995524346248, 
353.9953612245384], "eval_len": [165, 150, 27, 238, 10, 14, 60, 13, 18, 147]}

 16%|█▌        | 159999/1000000 [1:45:55<7:17:55, 31.97it/s]global step 160000, trans_decision ep_re 387.2057590838581

{"global_step": 160000, "eval_re": [587.8864582108093, 2.960111655687544, 
907.0441522155728, 1.7681991211481676, 747.7124742947381, -0.39728910323055994, 
5.196910350629172, 451.82655614493973, 104.36896803638027, 1063.691049911906], 
"eval_len": [205, 14, 322, 26, 261, 16, 20, 239, 81, 374]}

 17%|█▋        | 169997/1000000 [1:53:00<7:10:58, 32.10it/s]global step 170000, trans_decision ep_re 337.8524823269932

{"global_step": 170000, "eval_re": [1.9290957002039864, 1.939642422223422, 
642.6115610771196, 703.426533035534, 310.12692993905, 3.827546190498098, 
862.8098272414402, 315.5254848241344, 3.459462794313654, 532.868740045414], 
"eval_len": [13, 13, 222, 279, 206, 14, 390, 159, 17, 200]}

 18%|█▊        | 179997/1000000 [1:59:50<7:13:56, 31.49it/s]global step 180000, trans_decision ep_re 144.4893489495401

{"global_step": 180000, "eval_re": [630.0512638351731, 5.008409718792473, 
46.662150316391816, -1.8098461864677506, 0.5907145481437821, 254.50933905988143,
45.04968244781686, 49.141951832718505, -1.4186373341244707, 417.1084612570755], 
"eval_len": [246, 30, 60, 26, 27, 164, 79, 63, 12, 180]}

 19%|█▉        | 189997/1000000 [2:06:30<6:57:42, 32.32it/s]global step 190000, trans_decision ep_re 389.3558299472189

{"global_step": 190000, "eval_re": [1303.869608868421, 384.0782120013869, 
5.4652077612348124, 551.5732579372329, 655.1964578812544, 736.1392425434628, 
-1.121788207712425, 3.1605098433391405, 212.104330909327, 43.09325993424208], 
"eval_len": [454, 175, 18, 190, 231, 275, 13, 17, 104, 86]}

 20%|█▉        | 199999/1000000 [2:13:22<7:06:20, 31.27it/s]global step 200000, trans_decision ep_re 442.85634008032764

{"global_step": 200000, "eval_re": [464.78801286891365, 871.2807664821393, 
2.7529419612838644, 758.8059411865189, 212.84891673984455, 558.065737698581, 
783.6534993000198, 1.2209272115381946, -0.15418593121263088, 775.30084328565], 
"eval_len": [183, 339, 30, 277, 121, 210, 305, 13, 15, 316]}

 21%|██        | 209997/1000000 [2:20:30<6:56:56, 31.58it/s]global step 210000, trans_decision ep_re 352.9318738274524

{"global_step": 210000, "eval_re": [1780.462128114792, 7.2900804807733826, 
1112.987276984814, 36.13319838037719, -0.630201344134337, 15.30154405679766, 
551.5254264925, 0.48169024382268205, 23.47197150392068, 2.2956233608612386], 
"eval_len": [713, 17, 394, 55, 25, 39, 221, 29, 47, 15]}

 22%|██▏       | 219997/1000000 [2:27:20<6:47:11, 31.93it/s]global step 220000, trans_decision ep_re 301.18206109564136

{"global_step": 220000, "eval_re": [885.7237235839655, 0.8873069432201387, 
816.7686840254008, 781.0046325589359, 213.40301458400043, 3.405966536328119, 
29.94842419943666, 1.9380407486070836, 201.7678967617749, 76.97292101474402], 
"eval_len": [328, 16, 319, 312, 115, 14, 55, 12, 113, 93]}

 23%|██▎       | 229997/1000000 [2:34:10<6:36:37, 32.36it/s]global step 230000, trans_decision ep_re 278.88518816912585

{"global_step": 230000, "eval_re": [7.307888920720017, 2.3599660554838575, 
5.779115703190286, 4.693925695448225, 330.7250065623041, 2.2602125284549963, 
1226.8233222885015, 0.25378661664890856, 5.899103510353207, 1202.7495538101532],
"eval_len": [19, 17, 30, 30, 145, 17, 423, 16, 16, 396]}

 24%|██▍       | 239997/1000000 [2:40:52<6:30:49, 32.41it/s]global step 240000, trans_decision ep_re 247.7311748341091

{"global_step": 240000, "eval_re": [9.755887788122172, 3.5196128202739057, 
236.07554553480958, 4.29009226935692, 363.60963968523026, 11.749469875482628, 
170.74666203591045, 801.4336838906394, 873.3771544404945, 2.754000000770986], 
"eval_len": [35, 16, 111, 21, 219, 37, 166, 311, 343, 13]}

 25%|██▍       | 249999/1000000 [2:47:44<6:28:43, 32.16it/s]global step 250000, trans_decision ep_re 465.03942368403995

{"global_step": 250000, "eval_re": [698.6556479001225, 6.36523743033761, 
30.442775634554557, 954.6407313329397, 732.2225972870069, 405.7562799418458, 
5.268788382078772, 378.44658340055935, 892.0854889814459, 546.5101065495079], 
"eval_len": [296, 17, 119, 347, 244, 157, 16, 157, 314, 208]}

 26%|██▌       | 259997/1000000 [2:54:50<6:30:13, 31.61it/s]global step 260000, trans_decision ep_re 450.59123732592616

{"global_step": 260000, "eval_re": [-2.9224720493486553, 189.4927109650799, 
1012.3367048922003, 2642.3085911475173, 5.711200752536574, -1.2008167896618256, 
188.33618900650757, 467.87587705148286, 5.14699123439724, -1.1726029514494132], 
"eval_len": [26, 121, 416, 961, 19, 33, 125, 200, 17, 29]}

 27%|██▋       | 269997/1000000 [3:01:31<6:20:20, 31.99it/s]global step 270000, trans_decision ep_re 732.7473364359473

{"global_step": 270000, "eval_re": [1428.4666964896664, 420.12996426768615, 
771.5459059678494, -0.28489138378907664, 271.21355866678016, 831.15342265916, 
-1.327241485844584, 5.53753854397371, 2921.271458318809, 679.7669523151816], 
"eval_len": [464, 247, 257, 10, 130, 335, 14, 16, 1000, 251]}

 28%|██▊       | 279997/1000000 [3:08:40<6:24:21, 31.22it/s]global step 280000, trans_decision ep_re 557.2256789108113

{"global_step": 280000, "eval_re": [958.7042185757172, 11.911792296341206, 
0.4481058151075627, 3.1443443015175747, 6.634997763619297, 824.8044529414173, 
1210.2383363421775, 1637.7044383165648, 3.270993169478553, 915.3951095861721], 
"eval_len": [350, 36, 26, 14, 19, 295, 447, 537, 14, 305]}

 29%|██▉       | 289997/1000000 [3:15:20<6:16:18, 31.45it/s]global step 290000, trans_decision ep_re 711.202304195463

{"global_step": 290000, "eval_re": [835.486594031919, 1412.6506092365307, 
1.244753772974512, 695.3001075712235, 1241.7262823876292, 2.6656713675542623, 
824.5043920915765, 230.97555551256715, 790.0583230501347, 1077.410752932519], 
"eval_len": [286, 531, 12, 283, 444, 19, 279, 227, 263, 375]}

 30%|██▉       | 299999/1000000 [3:22:14<6:04:51, 31.98it/s]global step 300000, trans_decision ep_re 270.66342351544813

{"global_step": 300000, "eval_re": [1156.8651615429533, 604.4967785566291, 
332.41805776205774, 3.520055321634519, 2.4087519278303757, 1.5839041582923241, 
7.147310616538259, 2.179394547762018, 596.7120452891627, -0.6972245683785409], 
"eval_len": [415, 319, 171, 18, 13, 12, 17, 26, 236, 12]}

 31%|███       | 309997/1000000 [3:29:20<5:56:42, 32.24it/s]global step 310000, trans_decision ep_re 759.1821318723105

{"global_step": 310000, "eval_re": [2323.0265023189995, 102.96373761618607, 
5.152798059619751, 5.811645779019901, 259.4896329544115, 1346.120463525544, 
5.036104781143495, 1622.7025473589167, 1917.7611369400358, 3.75674938922817], 
"eval_len": [879, 125, 15, 19, 144, 547, 20, 566, 650, 18]}

 32%|███▏      | 319997/1000000 [3:36:10<5:56:22, 31.80it/s]global step 320000, trans_decision ep_re 427.6960927898091

{"global_step": 320000, "eval_re": [0.3390366518443516, 1516.086958903087, 
750.0855896842249, 280.6181456441668, 647.4950813233295, 4.985520516977448, 
169.29084060160343, 91.93184737679809, 5.135922180464879, 810.9919850155942], 
"eval_len": [29, 522, 272, 181, 220, 37, 187, 84, 18, 322]}

 33%|███▎      | 329997/1000000 [3:42:43<5:41:57, 32.66it/s]global step 330000, trans_decision ep_re 142.60930065017595

{"global_step": 330000, "eval_re": [6.808525988793466, 598.4508544234934, 
4.874486544717861, 1.4673338177311406, 0.25298835781710416, 4.064831027385533, 
1.618802900330853, 1.2231441240668417, 5.808216770673904, 801.5238225467497], 
"eval_len": [19, 225, 19, 12, 13, 18, 18, 19, 31, 283]}

 34%|███▍      | 339999/1000000 [3:49:40<5:38:16, 32.52it/s]global step 340000, trans_decision ep_re 682.4659947193721

{"global_step": 340000, "eval_re": [1260.8198212519012, 2499.928570563724, 
697.9459583432384, 2.818855289915251, 8.609316198642382, 4.403373758339346, 
878.1606368051174, -0.24562131190586362, 1474.7969425330773, 
-2.577906238327886], "eval_len": [454, 822, 278, 16, 18, 18, 320, 17, 509, 29]}

 35%|███▍      | 349997/1000000 [3:56:30<5:43:42, 31.52it/s]global step 350000, trans_decision ep_re 747.3262063436151

{"global_step": 350000, "eval_re": [1830.7345628438327, 51.188184774804206, 
1371.7757364905694, 503.98533054230063, 1586.131524687916, 7.151238511398695, 
5.599783577895509, 875.9850637916048, 1214.5307783977182, 26.179859818111034], 
"eval_len": [547, 85, 422, 214, 478, 19, 18, 294, 348, 49]}

 36%|███▌      | 359997/1000000 [4:03:05<5:38:30, 31.51it/s]global step 360000, trans_decision ep_re 423.002055848153

{"global_step": 360000, "eval_re": [-0.5488705625580519, 1764.0353531275343, 
4.623200040637681, 1245.5122514166435, -1.4845419857862192, 6.304244335408579, 
0.734844551004354, 4.1468212056521185, 10.685866382896426, 1196.0113899700968], 
"eval_len": [15, 540, 33, 394, 11, 30, 20, 26, 35, 386]}

 37%|███▋      | 369997/1000000 [4:09:51<5:21:55, 32.62it/s]global step 370000, trans_decision ep_re 621.14706926725

{"global_step": 370000, "eval_re": [1648.0861211439985, 498.2190982027833, 
-0.3667582535864432, 0.22523160678119725, 2.4368789190874733, 525.9593929951566,
226.9226023943616, 1927.9003290420098, 384.3097633551947, 997.7780332667129], 
"eval_len": [518, 187, 18, 29, 28, 263, 106, 565, 194, 364]}

 38%|███▊      | 379999/1000000 [4:16:41<5:14:12, 32.89it/s]global step 380000, trans_decision ep_re 637.6570745081187

{"global_step": 380000, "eval_re": [5.498743356669508, -0.6073359817221687, 
534.4645899355676, 2874.841452196079, 0.2831582253338844, 2.423988065026921, 
1255.7469699396754, 1696.2970092200308, 1.818485707941698, 5.803684416584284], 
"eval_len": [31, 14, 204, 823, 28, 16, 438, 551, 17, 19]}

 39%|███▉      | 389997/1000000 [4:23:40<5:15:51, 32.19it/s]global step 390000, trans_decision ep_re 463.82225602440695

{"global_step": 390000, "eval_re": [1539.3772674163968, 5.908481163389518, 
32.76463721871305, -1.2522005922720536, 386.2249975723749, 5.229523632767399, 
4.468304754882254, 1546.6053307979785, 919.8575349801561, 199.03868329968307], 
"eval_len": [523, 17, 53, 14, 169, 32, 19, 475, 316, 188]}

 40%|███▉      | 399997/1000000 [4:30:30<5:24:41, 30.80it/s]global step 400000, trans_decision ep_re 414.1143839680775

{"global_step": 400000, "eval_re": [1109.358801538061, 134.0326641582008, 
16.396251332394254, 4.434201017576843, 33.99516509984021, 2197.868360934594, 
1.9119505627298397, 33.79152051398491, 1.3179865616323112, 608.0369379617614], 
"eval_len": [336, 98, 31, 15, 49, 673, 16, 70, 33, 229]}

 41%|████      | 409997/1000000 [4:37:20<5:17:57, 30.93it/s]global step 410000, trans_decision ep_re 617.0010394167409

{"global_step": 410000, "eval_re": [1012.2490536532989, 0.051257135770101464, 
29.00086089887091, 15.863439433516234, 44.63199087935877, 0.4394999411996421, 
1388.648827087725, 1870.4553398895014, 1808.8440795206066, 
-0.17395427243710615], "eval_len": [340, 16, 51, 49, 65, 11, 408, 587, 535, 16]}

 42%|████▏     | 419997/1000000 [4:44:05<5:00:21, 32.18it/s]global step 420000, trans_decision ep_re 120.73723535801795

{"global_step": 420000, "eval_re": [9.65846544363284, 4.235844218090978, 
93.35508124706938, 20.569891060060264, 28.013623872444413, -3.067425698692583, 
0.1150637117385869, 76.69187360670503, 791.2791057340227, 186.52083038510796], 
"eval_len": [36, 18, 94, 42, 44, 25, 24, 80, 302, 84]}

 43%|████▎     | 429999/1000000 [4:51:10<5:00:08, 31.65it/s]global step 430000, trans_decision ep_re 563.0469427522221

{"global_step": 430000, "eval_re": [3.843850920402474, 220.2078503875978, 
-0.47435087096222345, 1653.5952794616037, 1785.0712549763641, 8.511624877411034,
260.4684814671108, 2.099950247726028, 0.20686046913488687, 1696.9386255858328], 
"eval_len": [14, 141, 16, 547, 549, 33, 123, 17, 17, 529]}

 44%|████▍     | 439999/1000000 [4:57:52<4:59:56, 31.12it/s]global step 440000, trans_decision ep_re 346.288843531076

{"global_step": 440000, "eval_re": [6.626099255766106, 4.295116324771664, 
8.493262427977712, 5.511738263974855, 4.9111253263786, 3414.338169880176, 
0.7098704649740519, 5.968962483369538, 1.9223273751704328, 10.111763508201548], 
"eval_len": [17, 16, 17, 15, 18, 1000, 14, 19, 13, 35]}

 45%|████▍     | 449997/1000000 [5:04:43<4:50:14, 31.58it/s]global step 450000, trans_decision ep_re 414.89950734958575

{"global_step": 450000, "eval_re": [25.083112781814204, 1313.9409123880635, 
1.7354516271971396, 4.89344317625515, 107.35793054376069, 1.0702338887115046, 
0.21634359763494185, 7.220647117721387, 1988.0818551799523, 699.3951431947468], 
"eval_len": [47, 473, 25, 15, 114, 17, 16, 31, 528, 333]}

 46%|████▌     | 459999/1000000 [5:11:50<4:40:53, 32.04it/s]global step 460000, trans_decision ep_re 605.5803092755892

{"global_step": 460000, "eval_re": [3259.53440381218, 2.4444115203703323, 
635.5384608041596, 12.422449558527392, 1431.2814186889311, 4.53282845285393, 
0.09407338487872816, 676.003275159982, 2.9224752738280433, 31.029296100180378], 
"eval_len": [973, 19, 249, 39, 473, 17, 15, 264, 15, 57]}

 47%|████▋     | 469997/1000000 [5:18:30<4:31:00, 32.60it/s]global step 470000, trans_decision ep_re 265.09543798910147

{"global_step": 470000, "eval_re": [27.52765535312058, 2.896787365377572, 
1404.0173929125276, 5.032098520295519, 172.45684001103416, 4.039435527127355, 
9.253559591113275, 964.7706272991968, 0.16449083163967132, 60.795492479582116], 
"eval_len": [109, 15, 429, 16, 102, 13, 20, 314, 14, 80]}

 48%|████▊     | 479999/1000000 [5:25:22<4:37:33, 31.22it/s]global step 480000, trans_decision ep_re 320.6758231612703

{"global_step": 480000, "eval_re": [470.5506521122317, 576.8521242749756, 
5.327533368917596, 9.333066303584015, 42.14522007939598, 0.14591688739683195, 
374.7016184981061, 411.2701578682159, 5.818848790251602, 1310.6130934296277], 
"eval_len": [218, 234, 17, 32, 53, 18, 155, 163, 30, 440]}

 49%|████▉     | 489997/1000000 [5:32:30<4:33:21, 31.10it/s]global step 490000, trans_decision ep_re 386.63857023366404

{"global_step": 490000, "eval_re": [1.5171294378588194, -0.5416934355742571, 
3.7624275758070995, 748.6913202902401, 229.60919605032592, 1.2830356556963727, 
28.874182428675137, 2403.7592673925387, 421.99366217618865, 27.437174764883686],
"eval_len": [17, 11, 16, 338, 126, 14, 48, 715, 165, 53]}

 50%|████▉     | 499997/1000000 [5:39:10<4:24:34, 31.50it/s]global step 500000, trans_decision ep_re 463.0017906267388

{"global_step": 500000, "eval_re": [492.37748581324104, 4.301463715302684, 
2.317687629275879, 3014.4267822820134, 436.12317581153167, 4.404628712108556, 
658.1303606084622, 9.355707844350654, 6.716831657541123, 1.8637821935607648], 
"eval_len": [210, 32, 12, 1000, 182, 29, 222, 18, 16, 14]}

 51%|█████     | 509999/1000000 [5:46:20<4:19:35, 31.46it/s]global step 510000, trans_decision ep_re 452.8853067489584

{"global_step": 510000, "eval_re": [2073.792986921138, 9.578583588249256, 
5.483243353835144, 4.277443820641613, 1162.8193217235219, 4.909460869625807, 
1242.2794169665028, 17.63733814824676, 6.631122137583532, 1.444149960238673], 
"eval_len": [632, 36, 17, 15, 640, 16, 381, 39, 34, 13]}

 52%|█████▏    | 519997/1000000 [5:53:10<4:15:07, 31.36it/s]global step 520000, trans_decision ep_re 439.6292248453095

{"global_step": 520000, "eval_re": [881.8738361493639, 3.193016424183562, 
2706.6607266581627, 717.7414510457683, 62.3973069572722, 17.5672120794101, 
3.9429229012517024, 1.1279815495448773, -2.016729494291933, 3.8045241824298226],
"eval_len": [324, 16, 811, 303, 60, 40, 14, 35, 14, 17]}

 53%|█████▎    | 529997/1000000 [6:00:10<4:06:51, 31.73it/s]global step 530000, trans_decision ep_re 1227.538587689457

{"global_step": 530000, "eval_re": [3336.0822893000477, 6.075397521945427, 
5.037216683384827, 974.4009991993993, 0.31634577274178177, 2308.8593694964406, 
1126.562385005559, 13.811014697492151, 3008.5489504209304, 1495.6919087966294], 
"eval_len": [1000, 27, 18, 336, 13, 701, 464, 37, 862, 448]}

 54%|█████▍    | 539997/1000000 [6:06:54<3:59:37, 31.99it/s]global step 540000, trans_decision ep_re 139.00853271793176

{"global_step": 540000, "eval_re": [1.8402438249401931, 38.02565590958189, 
-0.28041622230061297, 1247.9759835358557, 5.848578384678103, 1.8666631299370364,
7.711826112769375, 15.348172500626724, 8.084719145153112, 63.66390085807595], 
"eval_len": [16, 47, 14, 429, 20, 17, 32, 37, 36, 216]}

 55%|█████▍    | 549999/1000000 [6:14:00<4:02:17, 30.95it/s]global step 550000, trans_decision ep_re 316.2853640608536

{"global_step": 550000, "eval_re": [3.7796230437838836, 381.1274035601181, 
10.734275099244062, 672.6295189009742, 978.1046218639358, 2.4387940782374167, 
3.7702704326623815, 11.982229343454346, 1089.758544677952, 8.528359608174133], 
"eval_len": [19, 184, 31, 265, 545, 16, 16, 39, 353, 30]}

 56%|█████▌    | 559997/1000000 [6:20:45<3:55:06, 31.19it/s]global step 560000, trans_decision ep_re 337.17718473493926

{"global_step": 560000, "eval_re": [196.77260430421546, 4.395748338241912, 
27.644013419024844, 3.26923020019223, 3.228859807599805, 7.403549333943428, 
1580.1475345537945, 2.4332426166814876, 12.439155288568516, 1534.0379094871303],
"eval_len": [124, 18, 139, 16, 16, 32, 495, 17, 39, 443]}

 57%|█████▋    | 569997/1000000 [6:27:41<3:48:12, 31.40it/s]global step 570000, trans_decision ep_re 635.5266084448136

{"global_step": 570000, "eval_re": [8.070136540567391, 3.6597419966385023, 
1491.6462681240973, 1303.0201264795066, 1.0655553895655407, -0.280251120053498, 
0.21486241394599592, 2.955578999333154, 770.0002734395275, 2774.913792185008], 
"eval_len": [34, 16, 492, 450, 15, 29, 15, 13, 339, 838]}

 58%|█████▊    | 579999/1000000 [6:34:50<3:39:52, 31.84it/s]global step 580000, trans_decision ep_re 602.3794447810716

{"global_step": 580000, "eval_re": [595.3564635668073, 932.8798400405824, 
166.67923732495402, 4.282074725067489, 2.503489967678761, 1026.139913973629, 
4.365249909448143, 1566.1519550293754, 1567.5074801972137, 157.92874307596043], 
"eval_len": [261, 319, 164, 14, 17, 350, 16, 496, 507, 78]}

 59%|█████▉    | 589999/1000000 [6:41:34<3:34:08, 31.91it/s]global step 590000, trans_decision ep_re 143.0537626305334

{"global_step": 590000, "eval_re": [1262.4922738106532, 6.989225274295776, 
3.544237812003202, 6.313837103361282, 0.6477136298310859, 1.3416399657735605, 
134.63619209524856, 3.0719037224758896, 3.407391562129268, 8.09321132956226], 
"eval_len": [402, 127, 15, 17, 18, 11, 102, 19, 14, 18]}

 60%|█████▉    | 599999/1000000 [6:48:40<3:34:38, 31.06it/s]global step 600000, trans_decision ep_re 456.60002187530927

{"global_step": 600000, "eval_re": [530.0967070720912, 3.5623166090873997, 
2.159218723775612, 1.8141901171445638, 369.0621196868616, 2151.8316846999655, 
6.377320864428975, 1491.0537074020133, 5.610589886798755, 4.432363690925577], 
"eval_len": [191, 18, 15, 17, 185, 583, 18, 437, 19, 19]}

 61%|██████    | 609997/1000000 [6:55:22<3:27:36, 31.31it/s]global step 610000, trans_decision ep_re 511.4783170620282

{"global_step": 610000, "eval_re": [0.9485396322273483, 1324.1267826839755, 
8.109648073347731, 167.56240578106267, 6.076347021794672, 123.69008650997043, 
1622.5039034215579, 166.94542874248026, 1690.3008154196818, 4.51921333418378], 
"eval_len": [11, 386, 18, 107, 17, 259, 452, 297, 472, 18]}

 62%|██████▏   | 619999/1000000 [7:02:21<3:25:02, 30.89it/s]global step 620000, trans_decision ep_re 102.50635126347738

{"global_step": 620000, "eval_re": [4.948692155356171, 10.86146290814238, 
-0.9193137495018233, 3.30502323380892, 36.161008265482025, 702.125506539616, 
3.728697486580833, 3.757261783345013, 194.57058955712273, 66.52458445482151], 
"eval_len": [15, 34, 15, 29, 45, 291, 29, 17, 119, 122]}

 63%|██████▎   | 629997/1000000 [7:09:16<3:18:01, 31.14it/s]global step 630000, trans_decision ep_re 163.89242723941865

{"global_step": 630000, "eval_re": [384.27727682378776, 0.34674395018177395, 
5.676764463771405, 132.30293214672662, -0.18847543891896695, 5.02182988774548, 
117.74017306540095, 21.937469322139307, 543.7582096938305, 428.051348479522], 
"eval_len": [195, 12, 18, 114, 26, 34, 102, 43, 230, 214]}

 64%|██████▍   | 639999/1000000 [7:16:11<3:08:34, 31.82it/s]global step 640000, trans_decision ep_re 154.30537949413798

{"global_step": 640000, "eval_re": [319.528738066795, 2.6912308216146714, 
4.31039502919046, 3.3139193242018874, 530.6228497212745, 1.5017539250851637, 
632.456321706873, 1.3955841003193716, 43.232750069819076, 4.00025217620677], 
"eval_len": [160, 17, 15, 19, 211, 13, 251, 14, 53, 17]}

 65%|██████▍   | 649997/1000000 [7:23:05<3:05:37, 31.43it/s]global step 650000, trans_decision ep_re 375.98690932816436

{"global_step": 650000, "eval_re": [997.4037978433238, 79.57360772928263, 
1239.6471212652089, 30.981770569755604, 3.3456077258007015, 1044.6283676397181, 
1.3571777556525106, 3.1362108317188633, 0.21985036222408844, 359.5755815589587],
"eval_len": [295, 93, 457, 45, 18, 453, 11, 15, 19, 140]}

 66%|██████▌   | 659997/1000000 [7:30:04<3:02:24, 31.07it/s]global step 660000, trans_decision ep_re 147.41491850102736

{"global_step": 660000, "eval_re": [6.718561887279189, 2.0191769886604085, 
23.494966406172434, 0.8248521755509759, 276.6695667246632, 2.4910260183632205, 
13.511954412719124, 12.032729259483236, 1126.1117811117426, 10.274570025639147],
"eval_len": [16, 28, 38, 26, 139, 18, 36, 37, 361, 34]}

 67%|██████▋   | 669999/1000000 [7:37:10<2:57:30, 30.99it/s]global step 670000, trans_decision ep_re 149.4028374683088

{"global_step": 670000, "eval_re": [1443.1901761144097, 5.635230189052602, 
-1.5749657828023604, 0.8943298244260036, 6.128523350292118, 1.9814313499953264, 
3.668387083084804, 38.27691769528474, -0.9615418350507596, -3.210113305604237], 
"eval_len": [460, 36, 33, 29, 33, 16, 26, 101, 17, 25]}

 68%|██████▊   | 679997/1000000 [7:43:50<2:51:29, 31.10it/s]global step 680000, trans_decision ep_re 7.119720891776329

{"global_step": 680000, "eval_re": [1.7367926266959706, 2.7656238721883826, 
24.517397427797714, 0.8266357196099813, 33.70863504248093, 4.227655819667467, 
-4.21233259808834, 3.656386512454398, 5.491693753516423, -1.5212792585596495], 
"eval_len": [27, 27, 79, 13, 46, 30, 24, 14, 15, 28]}

 69%|██████▉   | 689997/1000000 [7:50:40<2:44:49, 31.35it/s]global step 690000, trans_decision ep_re 249.48964796164105

{"global_step": 690000, "eval_re": [3.3103148332461174, 1.1360088598628686, 
0.6328184027699135, 192.14199828921232, 125.81637967239013, 5.944879599167786, 
1895.5490332312854, 101.03632232183057, 8.905249247845028, 160.42347515880047], 
"eval_len": [17, 18, 14, 102, 111, 19, 568, 116, 34, 94]}

 70%|██████▉   | 699999/1000000 [7:57:33<2:39:08, 31.42it/s]global step 700000, trans_decision ep_re 611.2850122155271

{"global_step": 700000, "eval_re": [6.571210196412327, 21.05682603858168, 
1083.3922521939803, 7.155528181290144, 688.396148731568, 190.0103522095254, 
36.012378640796626, 1958.0985585834933, 594.3636271890155, 1527.793240190607], 
"eval_len": [17, 48, 347, 19, 269, 120, 73, 535, 251, 435]}

 71%|███████   | 709999/1000000 [8:04:31<2:33:20, 31.52it/s]global step 710000, trans_decision ep_re 244.2260855690069

{"global_step": 710000, "eval_re": [491.2148802563893, 576.220577345826, 
37.02535591931341, 21.538281006096515, 5.985222586245899, 611.7334657135708, 
9.236136168576232, 659.9394729777251, 23.276318883137595, 6.09114483318866], 
"eval_len": [196, 232, 51, 38, 30, 277, 35, 252, 44, 35]}

 72%|███████▏  | 719997/1000000 [8:11:40<2:26:57, 31.76it/s]global step 720000, trans_decision ep_re 497.8993691360969

{"global_step": 720000, "eval_re": [1829.374103936956, 227.22517250635778, 
533.4877237316017, 1326.8167544703394, 3.037688139286288, 6.529919786769641, 
4.40730193278343, 1044.1820379447718, 0.8014382826518233, 3.131550629450877], 
"eval_len": [511, 206, 226, 404, 17, 19, 17, 326, 25, 31]}

 73%|███████▎  | 729997/1000000 [8:18:25<2:22:42, 31.53it/s]global step 730000, trans_decision ep_re 275.39683626004035

{"global_step": 730000, "eval_re": [-0.379718169235853, 7.151572262063826, 
8.597042259935252, 465.20876076198647, 1187.1835051226094, 5.391616738326147, 
2.1423409963091986, 1072.0817538250556, 3.8107437750663715, 2.7807450282868205],
"eval_len": [13, 32, 29, 204, 390, 31, 30, 335, 19, 15]}

 74%|███████▍  | 739999/1000000 [8:25:21<2:18:31, 31.28it/s]global step 740000, trans_decision ep_re 270.1191391494352

{"global_step": 740000, "eval_re": [2.774492697149575, 24.11028583911314, 
116.41415648378822, 384.75780000038026, 7.093695434656303, 2.254567246483012, 
1116.9582213946428, -1.731108728611088, 7.930010483997088, 1040.6292706427532], 
"eval_len": [20, 145, 95, 246, 19, 17, 358, 12, 35, 348]}

 75%|███████▍  | 749999/1000000 [8:32:30<2:11:33, 31.67it/s]global step 750000, trans_decision ep_re 274.6357771483318

{"global_step": 750000, "eval_re": [161.69589550652424, 4.900897706909781, 
24.7932920514166, 49.74625191212471, 892.5949545058538, 1.5565188131433938, 
91.42864605862152, 507.3258061695022, 1009.2029525253395, 3.112556233881943], 
"eval_len": [94, 17, 43, 148, 265, 14, 181, 203, 358, 18]}

 76%|███████▌  | 759997/1000000 [8:39:13<2:04:26, 32.15it/s]global step 760000, trans_decision ep_re 179.99113258088877

{"global_step": 760000, "eval_re": [74.07853795277653, 5.247389945581311, 
306.068146878448, 5.516630313207532, 1314.7277984745679, 46.85678860576694, 
16.20370978774315, 1.9375254953920455, 22.492277529278475, 6.782520826125774], 
"eval_len": [68, 18, 168, 31, 406, 174, 46, 28, 41, 19]}

 77%|███████▋  | 769999/1000000 [8:46:20<2:03:40, 30.99it/s]global step 770000, trans_decision ep_re 589.2179879374643

{"global_step": 770000, "eval_re": [653.8524737647169, 12.017256097618452, 
870.7487407128807, 1553.291405980121, 0.6685849288505568, 693.8559704213274, 
991.7768402883218, 1107.2417165790116, 6.441488019365718, 2.2854025824293314], 
"eval_len": [224, 36, 373, 462, 14, 244, 330, 412, 33, 12]}

 78%|███████▊  | 779997/1000000 [8:53:20<1:56:04, 31.59it/s]global step 780000, trans_decision ep_re 251.1097087475765

{"global_step": 780000, "eval_re": [9.248004219946147, 0.8955149675633521, 
3.681066636368362, 1.8367006996498916, 184.77595547354133, 419.8055691285085, 
1381.2822545912713, 4.583033835442209, 0.3135845842172589, 504.67540333925666], 
"eval_len": [30, 34, 17, 12, 114, 159, 411, 34, 26, 184]}

 79%|███████▉  | 789997/1000000 [9:00:02<1:49:16, 32.03it/s]global step 790000, trans_decision ep_re 175.7434103918496

{"global_step": 790000, "eval_re": [31.224893106842973, 1316.1081156576727, 
7.022591673117506, 6.45431579747826, 6.497199424961312, 195.706465092759, 
-2.536676626389213, 2.2867231948512377, 180.04617860462037, 14.624297992582177],
"eval_len": [50, 381, 17, 17, 21, 285, 16, 18, 95, 40]}

 80%|███████▉  | 799999/1000000 [9:07:10<1:44:09, 32.00it/s]global step 800000, trans_decision ep_re 970.5974587052178

{"global_step": 800000, "eval_re": [4.422147849114909, 2344.4266636118714, 
148.68940768967457, 1020.176936579312, 3357.251754638458, 28.735336672102836, 
930.4811285234343, 37.948423682061765, 3.4444217211448813, 1830.3983660850035], 
"eval_len": [16, 616, 149, 305, 895, 70, 318, 49, 12, 475]}

 81%|████████  | 809997/1000000 [9:14:10<1:40:39, 31.46it/s]global step 810000, trans_decision ep_re 630.7979273797064

{"global_step": 810000, "eval_re": [2192.01422748234, 168.39346772028628, 
1249.888218255013, 253.17112210647858, 542.957668111611, 1126.8994176901533, 
0.26514970324261633, 2.286094269005334, 768.8816436896491, 3.2222647692850095], 
"eval_len": [536, 87, 358, 151, 199, 387, 12, 12, 276, 28]}

 82%|████████▏ | 819997/1000000 [9:21:10<1:33:47, 31.99it/s]global step 820000, trans_decision ep_re 590.1236414667819

{"global_step": 820000, "eval_re": [710.3128156309325, 0.6211555020445616, 
91.10875545856072, 35.24171785048268, -2.1643501560348954, 222.91264300018022, 
2558.225995728189, -2.2154911954383376, 2221.6782133637753, 65.51495948512684], 
"eval_len": [398, 12, 124, 47, 16, 170, 708, 23, 769, 77]}

 83%|████████▎ | 829997/1000000 [9:27:57<1:29:45, 31.57it/s]global step 830000, trans_decision ep_re 217.76093525349842

{"global_step": 830000, "eval_re": [2.4193215132844204, 2054.2456471007586, 
2.517854457622007, 78.88710812983457, 15.238607983867015, 5.632886005611821, 
15.697947268940041, 2.46249443349325, 4.676644278382348, -4.1691586368102955], 
"eval_len": [15, 529, 33, 55, 36, 31, 34, 13, 18, 16]}

 84%|████████▍ | 839999/1000000 [9:34:52<1:26:01, 31.00it/s]global step 840000, trans_decision ep_re 292.37456451180634

{"global_step": 840000, "eval_re": [2.286037487752762, 445.17547983015334, 
-1.0598942937235445, 1.3352468742134802, 139.77312254153574, 
-2.2777233705929154, 483.9234446550798, 1224.8573712027965, 604.3008145474865, 
25.431745643361765], "eval_len": [15, 178, 27, 18, 138, 25, 206, 384, 213, 70]}

 85%|████████▍ | 849997/1000000 [9:42:00<1:18:34, 31.81it/s]global step 850000, trans_decision ep_re 633.8606238633377

{"global_step": 850000, "eval_re": [46.63624233507379, 513.489885911129, 
71.3775759268411, 13.115091741357702, 328.6741597069611, 327.73687280781536, 
3.9849867371554692, 3251.7082193696774, 7.2253444911194284, 1774.6578596062461],
"eval_len": [51, 277, 59, 30, 166, 144, 18, 908, 32, 532]}

 86%|████████▌ | 859997/1000000 [9:49:00<1:13:27, 31.76it/s]global step 860000, trans_decision ep_re 363.14122653586037

{"global_step": 860000, "eval_re": [257.0258863869205, -1.2251950548787747, 
2.5124502217397113, 1928.2711107373937, 323.1460651306624, 
-0.003527073932788505, -0.1997287832720026, 238.38974805346942, 
879.7774770166736, 3.717978723827206], "eval_len": [129, 15, 18, 626, 150, 11, 
11, 107, 283, 16]}

 87%|████████▋ | 869997/1000000 [9:55:43<1:07:36, 32.04it/s]global step 870000, trans_decision ep_re 249.206572977333

{"global_step": 870000, "eval_re": [2.122858561074807, 5.763141924920244, 
293.98485385166816, 1238.720649776006, 1.7992233976759606, 718.3511951417706, 
205.53466271572566, 12.138605831914091, 4.839655254112364, 8.810883318461848], 
"eval_len": [27, 16, 181, 388, 33, 237, 102, 32, 14, 31]}

 88%|████████▊ | 879999/1000000 [10:02:50<1:04:14, 31.13it/s]global step 880000, trans_decision ep_re 881.796779285982

{"global_step": 880000, "eval_re": [3905.3168914638327, 8.45705606448957, 
0.6316574230181891, 6.164078719607193, 744.8461796474487, 4.8762995404597005, 
1569.2436311609285, 2572.473445136205, 4.480837788769759, 1.477715915062392], 
"eval_len": [1000, 30, 15, 18, 237, 18, 452, 727, 15, 14]}

 89%|████████▉ | 889997/1000000 [10:09:50<57:49, 31.71it/s]global step 890000, trans_decision ep_re 184.47077086158166

{"global_step": 890000, "eval_re": [1276.6639283184327, 47.75778029911421, 
135.5198361141487, 4.162427054813257, 0.77238836973636, 4.973904553011003, 
244.0736054712498, 4.747783687109671, 84.55819337292894, 41.477861375272006], 
"eval_len": [396, 55, 104, 15, 16, 17, 125, 20, 60, 56]}

 90%|████████▉ | 899997/1000000 [10:16:33<53:32, 31.13it/s]global step 900000, trans_decision ep_re 232.9620053780233

{"global_step": 900000, "eval_re": [3.3716645529304463, 3.6475581986401666, 
139.05108602534503, 1061.9035012321006, 0.8577446988057568, 3.3419010318983946, 
3.793864943349551, -2.465076993086743, 1064.6425738988758, 51.475236191374044], 
"eval_len": [29, 15, 95, 330, 18, 29, 17, 17, 356, 56]}

 91%|█████████ | 909999/1000000 [10:23:28<47:08, 31.82it/s]global step 910000, trans_decision ep_re 9.492190751838555

{"global_step": 910000, "eval_re": [5.412263817527705, 2.1134045716065826, 
5.873386950815195, 3.980417621701291, 44.98045941806808, 7.997463595579576, 
10.397786469890004, 11.574875317960117, 1.4974433551019555, 1.0944064001350544],
"eval_len": [18, 12, 31, 30, 54, 18, 33, 31, 16, 12]}

 92%|█████████▏| 919999/1000000 [10:30:30<41:57, 31.77it/s]global step 920000, trans_decision ep_re 482.6232239027023

{"global_step": 920000, "eval_re": [160.4574037508598, 4.150033519975892, 
2.9321877754901804, 1.5502517392329749, -0.8667913026625438, 969.3972569865181, 
4.915173665340388, 1404.0183941670493, 2278.110719663415, 1.5676090618040008], 
"eval_len": [105, 14, 14, 14, 29, 285, 20, 447, 654, 14]}

 93%|█████████▎| 929997/1000000 [10:37:17<36:33, 31.91it/s]global step 930000, trans_decision ep_re 352.53270226084044

{"global_step": 930000, "eval_re": [813.696619689765, 1827.343293684858, 
17.636721896503825, 10.645263288199892, 3.4795122943173253, 827.3758873894567, 
16.750229916662477, 4.84961819508314, 1.4528124693021929, 2.097063784255654], 
"eval_len": [315, 548, 37, 33, 17, 273, 37, 14, 16, 18]}

 94%|█████████▍| 939999/1000000 [10:44:13<32:12, 31.05it/s]global step 940000, trans_decision ep_re 363.53496235559106

{"global_step": 940000, "eval_re": [-0.8587253417791516, 858.6171774475995, 
16.084293732528558, 1369.3292201761044, 565.4236580651673, 4.181567921469584, 
0.6764795400346191, 808.5179369538498, 8.11809525582029, 5.259919805115963], 
"eval_len": [26, 333, 69, 400, 215, 20, 14, 270, 19, 20]}

 95%|█████████▍| 949997/1000000 [10:51:20<25:51, 32.22it/s]global step 950000, trans_decision ep_re 494.00894335992626

{"global_step": 950000, "eval_re": [424.6320597457166, 3.5644356328192646, 
1609.9917738381357, 2.9256643429922264, 1189.7604785920184, 189.76384678872037, 
1416.6022479094672, 4.27526781662152, 89.59626648465112, 8.97739244812038], 
"eval_len": [177, 17, 484, 27, 358, 103, 406, 15, 165, 92]}

 96%|█████████▌| 959997/1000000 [10:58:06<21:09, 31.51it/s]global step 960000, trans_decision ep_re 128.77560639545038

{"global_step": 960000, "eval_re": [0.274433545359278, 1.0006173521323303, 
9.965977703903997, 4.25943432544147, 6.86217802906147, 3.975331066629008, 
738.67584201714, 287.6655746546093, 228.0238375243127, 7.052837735914259], 
"eval_len": [11, 15, 36, 13, 18, 13, 278, 122, 128, 31]}

 97%|█████████▋| 969999/1000000 [11:05:01<15:51, 31.54it/s]global step 970000, trans_decision ep_re 496.09471713531804

{"global_step": 970000, "eval_re": [838.1600104796811, 1.1641331745273085, 
5.766792825304704, 7.555812812768521, 2.158088418645213, 833.916473783912, 
1817.9708758402467, 1430.1871340386003, 6.88847699470134, 17.179372984792384], 
"eval_len": [273, 16, 31, 19, 13, 266, 544, 454, 19, 37]}

 98%|█████████▊| 979997/1000000 [11:12:10<10:31, 31.66it/s]global step 980000, trans_decision ep_re 388.38838442286595

{"global_step": 980000, "eval_re": [14.994257800110239, 0.8706323658457897, 
-1.0450507838874132, 1.590026338995765, 9.10429707953795, 5.74367318710605, 
1297.978290777854, 2533.19243179977, 4.066112669297904, 17.389172994029046], 
"eval_len": [39, 18, 19, 13, 33, 18, 362, 729, 29, 34]}

 99%|█████████▉| 989997/1000000 [11:18:56<05:17, 31.49it/s]global step 990000, trans_decision ep_re 243.89176158087585

{"global_step": 990000, "eval_re": [1755.0233299296178, 234.02719425861164, 
17.68542457554937, 260.08995136967843, 58.93604416675633, 3.784230472935141, 
65.94719210473137, 28.3596370450562, 18.17632797805862, -3.111716092236255], 
"eval_len": [537, 235, 39, 149, 81, 28, 98, 57, 33, 15]}

100%|█████████▉| 999999/1000000 [11:26:00<00:00, 31.49it/s]global step 1000000, trans_decision ep_re 263.97194062329083

{"global_step": 1000000, "eval_re": [132.70604027258597, 1138.186543688384, 
1.5443236735341817, 39.28698560611608, 2.847470820876636, 2.0873264678010797, 
197.36866053750526, 2.1583804537055227, 760.587977403676, 362.94569730872337], 
"eval_len": [65, 336, 20, 133, 12, 16, 100, 17, 278, 158]}

100%|██████████| 1000000/1000000 [11:26:02<00:00, 24.29it/s]
