
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.2
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:10<12:37:43, 21.78it/s]global step 10000, trans_decision ep_re 18.364658386405058

{"global_step": 10000, "eval_re": [5.3924589419546525, 14.622828287010522, 
4.518048419847924, 5.648160865745842, 109.77390307422084, 15.55980887325114, 
4.66628622182933, -0.21589814670527102, 19.6877619969728, 3.9932253299227813], 
"eval_len": [25, 37, 17, 20, 136, 27, 15, 23, 30, 18]}

  2%|▏         | 19999/1000000 [15:31<12:32:56, 21.69it/s]global step 20000, trans_decision ep_re 11.750007696624614

{"global_step": 20000, "eval_re": [13.496515938247102, 14.722371176096956, 
38.465060927241694, 9.99457028442293, 8.315093575358608, 2.5385892800801373, 
17.73770360342691, 5.833148038181677, 1.288702203290161, 5.108321939899968], 
"eval_len": [36, 31, 69, 24, 20, 20, 41, 18, 16, 32]}

  3%|▎         | 29998/1000000 [25:52<12:21:10, 21.81it/s]global step 30000, trans_decision ep_re 6.424521754159241

{"global_step": 30000, "eval_re": [5.670466099188931, 12.10215981546605, 
-2.1712061100721622, 0.9448099010083538, 6.953954879526463, 19.893064369601934, 
3.969411842963461, 7.729794868477374, 5.9066485330642235, 3.2461133423677855], 
"eval_len": [18, 27, 17, 12, 40, 36, 17, 20, 16, 19]}

  4%|▍         | 39997/1000000 [36:30<12:23:02, 21.53it/s]global step 40000, trans_decision ep_re 6.07335808535076

{"global_step": 40000, "eval_re": [11.411648151404355, 8.744187995066891, 
6.638630315348574, 2.309857447397023, 11.506966676744518, 4.33124362481608, 
0.8866443816819815, 0.4821236675190619, 2.5573242820355415, 11.86495431149358], 
"eval_len": [23, 29, 27, 19, 31, 33, 17, 10, 17, 27]}

  5%|▍         | 49997/1000000 [46:50<12:04:29, 21.85it/s]global step 50000, trans_decision ep_re 16.669977300168462

{"global_step": 50000, "eval_re": [11.478499732663387, 18.804369838605297, 
-0.1384001931715994, 4.902154090112797, -3.780245450487373, 4.773746972077896, 
23.280774513822866, -1.3674384761847924, -2.6392873959045238, 
111.38559937015066], "eval_len": [35, 43, 15, 19, 26, 17, 41, 17, 27, 108]}

  6%|▌         | 59999/1000000 [57:10<12:07:18, 21.54it/s]global step 60000, trans_decision ep_re 5.083644598499381

{"global_step": 60000, "eval_re": [3.8477273015992193, 4.19254329877895, 
5.886447621100085, 16.2925611019812, 4.571872702346844, 3.8570565338387555, 
10.259961108035139, 5.896957387621638, -4.3842414907486384, 0.4155604204406256],
"eval_len": [23, 18, 16, 39, 22, 20, 27, 22, 26, 24]}

  7%|▋         | 69999/1000000 [1:07:30<11:57:13, 21.61it/s]global step 70000, trans_decision ep_re 4.658050686976259

{"global_step": 70000, "eval_re": [3.3464643090358996, 10.832653754298365, 
3.1507684108785585, 6.902115063612468, 3.792453852367994, 2.1428849900015012, 
9.242732969515911, -3.761670129368037, 6.10436690874084, 4.827736740679096], 
"eval_len": [13, 27, 18, 19, 26, 15, 30, 21, 18, 17]}

  8%|▊         | 79999/1000000 [1:17:50<11:45:29, 21.73it/s]global step 80000, trans_decision ep_re 6.815091103795962

{"global_step": 80000, "eval_re": [10.4203237790894, 6.271354661782658, 
16.57535321738084, 5.305604393400655, 3.9613088001079704, 2.1517028669383556, 
4.735886809562437, 7.844363433645568, 5.95564355919545, 4.929369516856279], 
"eval_len": [27, 25, 32, 16, 14, 25, 16, 23, 18, 18]}

  9%|▉         | 89999/1000000 [1:28:10<11:40:00, 21.67it/s]global step 90000, trans_decision ep_re 34.50391093975521

{"global_step": 90000, "eval_re": [-3.8795592624199244, 6.057058581660943, 
283.6480252874589, 14.02472841261009, 14.469373659227266, 2.984688811580293, 
8.463344144951817, -0.3757650116685907, 17.813662012536113, 1.833552761615173], 
"eval_len": [12, 30, 142, 27, 28, 17, 18, 27, 44, 21]}

 10%|▉         | 99999/1000000 [1:38:20<11:31:40, 21.69it/s]global step 100000, trans_decision ep_re 9.907438103273886

{"global_step": 100000, "eval_re": [19.280574991391294, 6.221948054857994, 
4.394460409884438, 5.432588928187196, 13.676021908224483, 11.56009065385218, 
5.842621242190614, 10.378298672777326, 7.626979856390674, 14.660796314982642], 
"eval_len": [34, 29, 23, 19, 30, 31, 16, 28, 25, 35]}

 11%|█         | 109999/1000000 [1:48:50<11:22:50, 21.72it/s]global step 110000, trans_decision ep_re 15.503007963015008

{"global_step": 110000, "eval_re": [-0.5077794825921718, 1.8304208246141864, 
3.4293562414425947, 98.94940461801865, 4.974915726427682, -0.824187903737597, 
8.606274490566477, 14.655854658516752, -0.8283394569335273, 24.744159913827048],
"eval_len": [25, 28, 19, 103, 29, 15, 22, 33, 13, 46]}

 12%|█▏        | 119999/1000000 [1:59:10<11:14:53, 21.73it/s]global step 120000, trans_decision ep_re 32.62747059144276

{"global_step": 120000, "eval_re": [3.0936097747782254, 6.998734352497855, 
276.82775505205456, 8.13958227742051, 0.7253955986294841, 8.061921695194156, 
2.162961465110804, 3.6873285845083816, 6.152996430228659, 10.42442068400501], 
"eval_len": [17, 17, 151, 23, 21, 32, 28, 14, 29, 44]}

 13%|█▎        | 129997/1000000 [2:09:30<11:09:02, 21.67it/s]global step 130000, trans_decision ep_re 32.94149815850803

{"global_step": 130000, "eval_re": [-2.379492595880487, 4.0397250819843835, 
5.721540116881749, 16.944992780570985, 0.9137061701502831, 6.26823589341631, 
10.632107027061735, 8.185733208575495, 260.31909538582255, 18.769338516497314], 
"eval_len": [17, 22, 15, 26, 23, 36, 26, 26, 151, 28]}

 14%|█▍        | 139998/1000000 [2:19:50<10:53:26, 21.94it/s]global step 140000, trans_decision ep_re 21.796087361760126

{"global_step": 140000, "eval_re": [8.093156639750188, 8.158745489551702, 
4.037392587534281, 10.184847261410017, 173.72083318740343, 2.6845671351577853, 
6.437176159556427, 3.729148713939833, -2.8392491297402986, 3.754255573037915], 
"eval_len": [27, 19, 13, 34, 136, 13, 16, 16, 35, 20]}

 15%|█▍        | 149998/1000000 [2:29:52<10:47:34, 21.88it/s]global step 150000, trans_decision ep_re 8.272440496485881

{"global_step": 150000, "eval_re": [2.1532495669460983, 4.434448884870842, 
4.773974051139127, 3.762544207789188, 32.73462069868804, 0.9961978039897994, 
9.97998429790174, 5.839107966329475, 9.273114997852527, 8.777162489351985], 
"eval_len": [13, 20, 17, 34, 47, 12, 21, 27, 34, 17]}

 16%|█▌        | 159999/1000000 [2:40:20<10:43:39, 21.75it/s]global step 160000, trans_decision ep_re 27.45216967592931

{"global_step": 160000, "eval_re": [8.933294756304496, 169.50854690386058, 
29.40256031219035, 11.695057250147206, 0.45621749561971736, 4.30410843263952, 
10.742538505987346, 9.859712659146242, 1.3420625696681536, 28.277597873729537], 
"eval_len": [18, 100, 51, 39, 12, 23, 20, 24, 18, 52]}

 17%|█▋        | 169999/1000000 [2:50:40<10:35:41, 21.76it/s]global step 170000, trans_decision ep_re 33.1114787479918

{"global_step": 170000, "eval_re": [5.787079654401209, 2.02343980732318, 
256.0316661098869, 0.20891153250263056, 7.663808077674993, 10.772075209720134, 
6.1503432918335985, 18.616973035623214, 6.157735817642205, 17.702754943309902], 
"eval_len": [24, 15, 149, 11, 33, 25, 29, 43, 24, 28]}

 18%|█▊        | 179997/1000000 [3:01:00<10:32:14, 21.62it/s]global step 180000, trans_decision ep_re 9.534300483456033

{"global_step": 180000, "eval_re": [-1.9512729332039078, 4.622837729913521, 
9.355846974569253, 12.122717174802263, 19.16310838851837, 7.272759012710307, 
5.330651856062125, 3.2089951101842114, 15.77054882513578, 20.44681269586842], 
"eval_len": [26, 19, 29, 33, 33, 18, 25, 18, 31, 35]}

 19%|█▉        | 189997/1000000 [3:11:20<10:20:32, 21.76it/s]global step 190000, trans_decision ep_re 7.010379503663769

{"global_step": 190000, "eval_re": [5.734555934803446, 8.539225691337796, 
13.346015166893244, 14.354892932329506, 3.552928461726614, 1.0000032285492855, 
13.987028639374994, 4.44188048149378, 3.027918422175146, 2.1193460779538804], 
"eval_len": [17, 24, 29, 44, 25, 17, 30, 21, 24, 17]}

 20%|█▉        | 199999/1000000 [3:21:21<10:10:50, 21.83it/s]global step 200000, trans_decision ep_re 36.19568211514436

{"global_step": 200000, "eval_re": [194.20299913352179, -1.7711122072342638, 
8.165098208549558, 0.9791511306413567, 5.897048658424034, 1.0372991076639044, 
4.756760330047007, 4.267132925624934, 141.5432075572677, 2.8792363069376234], 
"eval_len": [135, 17, 34, 22, 18, 30, 22, 19, 112, 16]}

 21%|██        | 209999/1000000 [3:31:50<10:06:31, 21.71it/s]global step 210000, trans_decision ep_re 6.901342347543583

{"global_step": 210000, "eval_re": [6.159194437999261, 0.3599248802130309, 
4.439997339361126, 0.3431841072908162, 5.052025688860056, 2.1739046363884382, 
8.285363613037319, 12.432264738887794, 17.12073051695602, 12.646833516441978], 
"eval_len": [18, 29, 18, 10, 16, 26, 20, 31, 27, 30]}

 22%|██▏       | 219999/1000000 [3:42:10<9:52:59, 21.92it/s]global step 220000, trans_decision ep_re 32.048227228887555

{"global_step": 220000, "eval_re": [5.860064557689361, -2.5099943406993916, 
18.416572677600698, 5.0326568419325275, 254.85373430496392, 11.283781655823258, 
7.3503285094347035, 7.0448323894576514, 3.8317360052331226, 9.318559687439691], 
"eval_len": [23, 18, 30, 25, 157, 24, 29, 17, 16, 20]}

 23%|██▎       | 229999/1000000 [3:52:10<9:43:53, 21.98it/s]global step 230000, trans_decision ep_re 9.622829575079521

{"global_step": 230000, "eval_re": [12.037588127192128, 8.009310348975077, 
18.537174220603365, 16.700919606410377, -4.85789420259175, 7.042271166536178, 
-0.03476052348079672, 14.103234679250445, 11.07259585388011, 
13.617856474020067], "eval_len": [23, 28, 43, 31, 31, 18, 15, 34, 23, 42]}

 24%|██▍       | 239997/1000000 [4:02:40<9:37:18, 21.94it/s]global step 240000, trans_decision ep_re 3.349811034072073

{"global_step": 240000, "eval_re": [4.7171310756722695, -0.45760143794897556, 
0.17168169059770555, 3.42094939999954, 2.0826677746646403, 3.376392327305013, 
0.5645546062763964, 8.20124083868213, 2.488031115426565, 8.93306295004544], 
"eval_len": [16, 11, 14, 31, 29, 37, 19, 33, 29, 22]}

 25%|██▍       | 249999/1000000 [4:12:50<9:30:07, 21.92it/s]global step 250000, trans_decision ep_re 6.643174039835512

{"global_step": 250000, "eval_re": [13.471824677052984, 6.697164774005915, 
-0.014609394581550472, 17.356622872899532, 11.298601545126639, 
2.994073356183258, 13.787071553807273, -6.55954057792892, 2.6794398158169455, 
4.721091775973039], "eval_len": [28, 23, 18, 29, 32, 32, 49, 24, 18, 19]}

 26%|██▌       | 259998/1000000 [4:22:52<9:15:52, 22.19it/s]global step 260000, trans_decision ep_re 10.394798485645497

{"global_step": 260000, "eval_re": [10.079230420431234, 11.96286872955404, 
10.371976410010014, 4.913491171574568, -0.4089557169767453, 4.067757860774095, 
3.24946410167232, 31.411042279742997, 2.9552920717038416, 25.34581752796861], 
"eval_len": [37, 24, 23, 22, 12, 14, 22, 51, 16, 36]}

 27%|██▋       | 269999/1000000 [4:33:20<9:12:41, 22.01it/s]global step 270000, trans_decision ep_re 10.081867215162285

{"global_step": 270000, "eval_re": [9.576236228538926, 14.823460943905584, 
25.58431695178922, 5.065989649831349, 8.973256052375806, 15.045823316393655, 
2.989196598660219, 5.900581423688873, 0.5805102651890064, 12.279300721250205], 
"eval_len": [22, 27, 42, 28, 19, 34, 24, 30, 13, 23]}

 28%|██▊       | 279998/1000000 [4:43:21<8:59:24, 22.25it/s]global step 280000, trans_decision ep_re 18.352784252905483

{"global_step": 280000, "eval_re": [9.66109674896937, 8.697005122844772, 
6.458265578285277, 14.062138283450587, 1.9565353180066987, 123.88245230275716, 
2.951055818883229, 12.480391054147647, -1.4855411524908217, 4.8644434542008925],
"eval_len": [23, 26, 17, 26, 15, 103, 16, 43, 28, 21]}

 29%|██▉       | 289999/1000000 [4:53:50<9:00:39, 21.89it/s]global step 290000, trans_decision ep_re 10.234067431038184

{"global_step": 290000, "eval_re": [10.845753368576108, 16.319945842501397, 
-1.282670223666519, 11.358430115258065, 6.539953751854683, 32.08064657234106, 
9.710812852722459, 1.081407802416598, 3.18150733354777, 12.504886894830221], 
"eval_len": [22, 32, 26, 32, 18, 46, 30, 32, 14, 24]}

 30%|██▉       | 299999/1000000 [5:04:00<8:55:02, 21.81it/s]global step 300000, trans_decision ep_re 73.77888247245477

{"global_step": 300000, "eval_re": [4.410317036279237, 3.515726115459455, 
5.177584834261627, 2.2257696631223647, 415.00596836376275, 1.4059809129075975, 
2.0684253234432437, 5.123604801411361, 290.3856595309913, 8.46978814290863], 
"eval_len": [19, 17, 20, 14, 279, 27, 16, 18, 176, 25]}

 31%|███       | 309999/1000000 [5:14:20<8:48:53, 21.74it/s]global step 310000, trans_decision ep_re 26.457663541482777

{"global_step": 310000, "eval_re": [23.643351674397568, 14.934050439844189, 
7.050742541984393, 5.882807694980046, 180.40252245613846, 9.377922763083008, 
6.122629909334213, 6.866065963391876, 3.4476723554233235, 6.848869616250658], 
"eval_len": [37, 35, 30, 26, 141, 27, 24, 32, 15, 18]}

 32%|███▏      | 319999/1000000 [5:24:30<8:37:43, 21.89it/s]global step 320000, trans_decision ep_re 19.284149475018182

{"global_step": 320000, "eval_re": [13.126086954160435, 7.6460486397767955, 
9.433496623898858, 14.317207129652603, 22.086842816580017, 1.9879331387213963, 
116.13254562639003, 1.2142464028286024, -1.8161548580848816, 8.713242276257986],
"eval_len": [23, 20, 23, 39, 31, 13, 94, 33, 11, 27]}

 33%|███▎      | 329997/1000000 [5:34:50<8:29:25, 21.92it/s]global step 330000, trans_decision ep_re 8.827105826844473

{"global_step": 330000, "eval_re": [3.500628646286578, 8.780971719320686, 
19.986649427564668, 13.336630448198607, 7.777889624507737, 2.1493812872564, 
10.146848923560128, 11.813775642641483, 2.724955899141159, 8.053326649967293], 
"eval_len": [20, 20, 48, 24, 34, 21, 36, 23, 13, 19]}

 34%|███▍      | 339999/1000000 [5:45:00<8:22:08, 21.91it/s]global step 340000, trans_decision ep_re 6.271792800584363

{"global_step": 340000, "eval_re": [5.035126910305104, 2.0391154602016885, 
3.5075099108368404, 5.239730604476633, 8.870414048950755, 8.5620358588808, 
5.9747118643744965, 9.686728663585924, 5.75482805133343, 8.047726632897966], 
"eval_len": [17, 21, 14, 17, 17, 23, 33, 26, 18, 22]}

 35%|███▍      | 349998/1000000 [5:55:01<8:11:16, 22.05it/s]global step 350000, trans_decision ep_re 8.536912652437502

{"global_step": 350000, "eval_re": [2.98093020736426, 4.815494034128401, 
16.09307091596509, 5.501930723556379, -3.542729386594825, 11.890541097518014, 
4.340851245933392, 12.421239170110228, 17.98960036914901, 12.878198147245065], 
"eval_len": [15, 21, 25, 22, 26, 25, 18, 30, 31, 25]}

 36%|███▌      | 359999/1000000 [6:05:30<8:08:27, 21.84it/s]global step 360000, trans_decision ep_re 5.904614917872292

{"global_step": 360000, "eval_re": [2.6083597770150146, 13.73077190103908, 
-2.2868555899606196, 4.841201128862354, 10.503938016724591, 3.130258073201407, 
-1.6484200057771878, 4.87254578064446, 8.821784689642561, 14.47256540733126], 
"eval_len": [15, 32, 17, 19, 27, 18, 25, 18, 19, 42]}

 37%|███▋      | 369999/1000000 [6:15:40<7:58:44, 21.93it/s]global step 370000, trans_decision ep_re 20.309567082263282

{"global_step": 370000, "eval_re": [4.281423285274449, 4.379268622414329, 
8.715680201127176, 6.078283526423992, 18.78548032517034, 9.045246655111699, 
6.958097086311998, 11.48642600680505, 129.84332826460889, 3.522436849384918], 
"eval_len": [15, 26, 23, 21, 46, 23, 25, 29, 99, 14]}

 38%|███▊      | 379999/1000000 [6:26:00<7:50:46, 21.95it/s]global step 380000, trans_decision ep_re 26.370556943467022

{"global_step": 380000, "eval_re": [-0.47323924055849564, 14.720845210822128, 
7.442661346009453, 203.06989310760764, 2.4418208030515998, 5.184989867213796, 
-1.1486375059906107, 7.187142961450223, 21.726038145284235, 3.554054739780238], 
"eval_len": [30, 25, 20, 125, 13, 25, 18, 17, 37, 15]}

 39%|███▉      | 389999/1000000 [6:36:10<7:42:22, 21.99it/s]global step 390000, trans_decision ep_re 4.521883282525333

{"global_step": 390000, "eval_re": [7.299770154027675, 4.215009120286497, 
6.082225072402372, 3.81856147444538, 10.807389702559284, 4.032956517118835, 
-0.10252694516293731, 2.067496579702207, 6.961815481244973, 
0.03613566862903811], "eval_len": [39, 29, 18, 32, 21, 28, 25, 21, 32, 18]}

 40%|███▉      | 399997/1000000 [6:46:30<7:34:29, 22.00it/s]global step 400000, trans_decision ep_re 6.904513402767263

{"global_step": 400000, "eval_re": [7.090197429410967, 16.088798182852813, 
4.063737837112184, 15.251355784296397, 6.346033218148842, 5.312538439810477, 
13.568080391695734, 2.755152944238921, -1.0727738385255787, 
-0.3579863613681242], "eval_len": [16, 33, 26, 28, 25, 19, 27, 32, 29, 11]}

 41%|████      | 409999/1000000 [6:56:40<7:32:12, 21.74it/s]global step 410000, trans_decision ep_re 5.909551642539823

{"global_step": 410000, "eval_re": [1.1263134080077126, 3.5302191848527134, 
10.994937554475028, 2.3869355357599957, 3.6094139537002152, 4.168582692775834, 
10.873306768963063, 0.2782180892932462, 4.732448139503422, 17.395141098066993], 
"eval_len": [32, 19, 22, 18, 18, 21, 27, 18, 36, 33]}

 42%|████▏     | 419998/1000000 [7:06:42<7:16:08, 22.16it/s]global step 420000, trans_decision ep_re 12.259846718762365

{"global_step": 420000, "eval_re": [51.27057113701476, 25.4016669829363, 
7.4820671478159975, 9.282433441633149, 2.067596326951763, 0.5657891456305271, 
16.2639430658672, -1.2868295337646436, 2.655228827400236, 8.89600064613838], 
"eval_len": [101, 33, 24, 20, 31, 21, 37, 10, 16, 35]}

 43%|████▎     | 429999/1000000 [7:17:10<7:14:44, 21.85it/s]global step 430000, trans_decision ep_re 24.546871916582194

{"global_step": 430000, "eval_re": [5.559908691124935, 2.0972758127974878, 
9.90334827504384, 12.711781483802206, 5.022863398397689, 5.755518637827588, 
18.23131813437069, 4.458159184418462, 182.67435408221033, -0.9458085341712911], 
"eval_len": [22, 43, 28, 28, 34, 20, 47, 23, 95, 17]}

 44%|████▍     | 439999/1000000 [7:27:11<7:04:54, 21.97it/s]global step 440000, trans_decision ep_re 8.629454145395036

{"global_step": 440000, "eval_re": [8.795902988963057, 3.052806488640157, 
3.9629967808357462, 1.131665976282886, 13.049766918783218, 6.869118861902318, 
13.79697223328396, 11.979903706656888, 2.434170910542739, 21.2212365880594], 
"eval_len": [20, 20, 39, 20, 42, 19, 33, 25, 14, 32]}

 45%|████▍     | 449999/1000000 [7:37:40<6:59:28, 21.85it/s]global step 450000, trans_decision ep_re 9.977926318040378

{"global_step": 450000, "eval_re": [10.301211100467196, 13.213187313449653, 
4.573623094647433, 15.823541602676576, 15.992792163031599, 12.049077937716621, 
10.071643921691225, 2.252778692269999, 9.877144751147524, 5.624262603305963], 
"eval_len": [22, 33, 17, 41, 29, 23, 23, 13, 35, 24]}

 46%|████▌     | 459999/1000000 [7:47:40<6:49:43, 21.97it/s]global step 460000, trans_decision ep_re 32.73516832976686

{"global_step": 460000, "eval_re": [15.456334785117232, 233.69538180389455, 
9.396785294355189, 5.324530057992151, 12.008688096129621, 14.44523358863756, 
15.72449066237722, 15.246953667683448, 0.2524739780151547, 5.80081136346651], 
"eval_len": [27, 163, 23, 21, 32, 36, 47, 33, 22, 21]}

 47%|████▋     | 469999/1000000 [7:58:10<6:41:20, 22.01it/s]global step 470000, trans_decision ep_re 20.365897728870575

{"global_step": 470000, "eval_re": [17.27910527263046, 5.65076031458627, 
31.412187905148766, 4.274141041127592, 12.63471240136006, 95.47873697340445, 
11.163042535679645, 4.182761327935666, 10.699881032233076, 10.883648484599767], 
"eval_len": [29, 19, 52, 20, 28, 102, 25, 27, 38, 21]}

 48%|████▊     | 479999/1000000 [8:08:10<6:37:00, 21.83it/s]global step 480000, trans_decision ep_re 7.57040386206293

{"global_step": 480000, "eval_re": [-2.4797898497197193, -0.41216947607763005, 
4.462882775760891, 13.394364872777015, 19.241341240651607, 12.119088201061697, 
6.917205085747435, 13.306373786354833, 2.3747253526018754, 6.780016631471302], 
"eval_len": [17, 15, 16, 30, 46, 34, 24, 44, 26, 19]}

 49%|████▉     | 489999/1000000 [8:18:40<6:30:17, 21.78it/s]global step 490000, trans_decision ep_re 29.957218867480783

{"global_step": 490000, "eval_re": [2.525758859572602, 17.51980227828061, 
232.35245308167438, 11.976490544817848, 4.59189883087043, 7.954855878459277, 
10.994161990504523, 9.268730174679124, 2.8331532724041946, 
-0.44511623645517007], "eval_len": [13, 44, 168, 26, 15, 30, 27, 23, 22, 13]}

 50%|████▉     | 499999/1000000 [8:28:40<6:19:55, 21.93it/s]global step 500000, trans_decision ep_re 25.16918813816712

{"global_step": 500000, "eval_re": [-0.4116882986617428, 4.4731959093843585, 
9.62591856905855, -1.8717249626866894, 2.9591067257994466, 7.230386430317397, 
11.794428917337443, 218.91282526464394, 0.011035536348220437, 
-1.0316027098697609], "eval_len": [16, 15, 23, 22, 19, 23, 29, 155, 21, 23]}

 51%|█████     | 509999/1000000 [8:39:10<6:14:08, 21.83it/s]global step 510000, trans_decision ep_re 3.9267983515085243

{"global_step": 510000, "eval_re": [1.544510894785035, -3.6774422110236844, 
9.405222635826094, 0.8084370848394661, -2.014087245604572, 9.163407662107693, 
11.769078493195309, 8.819475639033083, 4.108517223931507, -0.6591366620046846], 
"eval_len": [25, 20, 18, 13, 12, 36, 25, 22, 14, 24]}

 52%|█████▏    | 519999/1000000 [8:49:20<6:04:35, 21.94it/s]global step 520000, trans_decision ep_re 24.508837675129442

{"global_step": 520000, "eval_re": [187.30480019091465, 9.233926530356698, 
0.8665637218322216, 4.467435173743824, 4.182581652635902, 6.175259033314021, 
5.650322320420825, 17.93308965572341, 7.711337354616456, 1.5630611177363585], 
"eval_len": [112, 30, 22, 16, 17, 19, 21, 36, 36, 27]}

 53%|█████▎    | 529997/1000000 [8:59:40<5:54:36, 22.09it/s]global step 530000, trans_decision ep_re 7.075017616219384

{"global_step": 530000, "eval_re": [8.733816811566875, 4.419548082203892, 
19.82777177242014, 0.9750382355909638, 2.436043224846027, 2.237738280245453, 
11.867626413712289, 3.402916331386955, 11.084856130456213, 5.7648208797650256], 
"eval_len": [24, 15, 35, 15, 14, 37, 23, 24, 23, 28]}

 54%|█████▍    | 539999/1000000 [9:09:50<5:49:37, 21.93it/s]global step 540000, trans_decision ep_re 6.55286406486905

{"global_step": 540000, "eval_re": [9.995816309791813, 2.1329646635757014, 
8.918870528544273, 7.6428696727565715, 10.076592801292282, 9.49839617647326, 
8.0748551598923, 1.434250164756307, 3.1438075486734003, 4.610217622934588], 
"eval_len": [22, 11, 30, 24, 25, 26, 27, 16, 21, 18]}

 55%|█████▍    | 549998/1000000 [9:19:53<5:40:23, 22.03it/s]global step 550000, trans_decision ep_re 7.03709343322514

{"global_step": 550000, "eval_re": [14.399566732882697, 7.210636946646956, 
5.272608752165158, 7.188982016031044, 2.4000503549582084, 1.724971208788544, 
3.4059626395100686, 13.841827671181463, 11.087851479997004, 3.838476530090243], 
"eval_len": [27, 26, 20, 22, 31, 14, 15, 29, 22, 15]}

 56%|█████▌    | 559999/1000000 [9:30:20<5:33:01, 22.02it/s]global step 560000, trans_decision ep_re 4.413126591957826

{"global_step": 560000, "eval_re": [11.579016617498624, -1.3276193570622636, 
5.081037588967178, 5.3858306819442845, 9.059812568335508, 2.8930248903277684, 
-4.403754998007435, 7.294108670828854, 7.1873404597697865, 1.3824687969759664], 
"eval_len": [29, 17, 25, 25, 32, 21, 27, 20, 18, 23]}

 57%|█████▋    | 569998/1000000 [9:40:22<5:24:04, 22.11it/s]global step 570000, trans_decision ep_re 9.948711206143843

{"global_step": 570000, "eval_re": [25.98325879906893, 8.793260881586823, 
6.034359475988135, 0.4409492489003319, -1.4239339430235576, 10.451866506004114, 
22.88976248754131, 15.41269632856415, 12.83537963216962, -1.930487355361433], 
"eval_len": [36, 24, 21, 12, 11, 19, 48, 24, 33, 25]}

 58%|█████▊    | 579999/1000000 [9:50:50<5:19:25, 21.91it/s]global step 580000, trans_decision ep_re 28.732857925271908

{"global_step": 580000, "eval_re": [15.405391241023807, 6.476293566907635, 
6.892510277771844, 2.5981090981309944, 3.070688026663384, 215.575898359851, 
11.975857026786082, 2.7921243534978126, 15.406089165615157, 7.135618136471364], 
"eval_len": [22, 17, 21, 19, 13, 149, 31, 20, 24, 27]}

 59%|█████▉    | 589998/1000000 [10:00:52<5:09:50, 22.05it/s]global step 590000, trans_decision ep_re 36.011649592306966

{"global_step": 590000, "eval_re": [9.587879364398633, 145.35372948652798, 
8.587038019944117, 11.786171035337778, 8.018352057615658, 1.9006866165938616, 
7.418752391057993, 0.9176204451524073, 159.21903471915192, 7.3272317872892865], 
"eval_len": [33, 98, 30, 28, 22, 15, 22, 18, 76, 34]}

 60%|█████▉    | 599999/1000000 [10:11:20<5:06:22, 21.76it/s]global step 600000, trans_decision ep_re 10.73400094550706

{"global_step": 600000, "eval_re": [6.90861041162942, 6.7502362959916224, 
4.600192933505036, 23.579914693934313, 2.5823462975997757, 19.99325424561061, 
3.602017429606019, 20.474888160295173, 8.04277937934243, 10.8057696075562], 
"eval_len": [19, 18, 16, 53, 13, 30, 14, 30, 29, 30]}

 61%|██████    | 609998/1000000 [10:21:22<4:53:37, 22.14it/s]global step 610000, trans_decision ep_re 29.034771253554247

{"global_step": 610000, "eval_re": [10.999006138754165, 1.1400152923788036, 
226.70652108468025, 19.13954560130277, 0.1089512860393157, 13.502532860900041, 
15.060323024967545, 2.435547391058213, -1.0508254440439826, 2.3060952995054014],
"eval_len": [34, 27, 265, 29, 11, 37, 26, 26, 10, 14]}

 62%|██████▏   | 619999/1000000 [10:31:50<4:48:05, 21.98it/s]global step 620000, trans_decision ep_re 5.830541241972894

{"global_step": 620000, "eval_re": [2.4293891994592554, 6.52121233613186, 
3.005186405898233, 5.385623032155127, 5.354757153327889, 7.322465107419178, 
6.622941360220559, 14.134656662510936, 2.2566588826791443, 5.272522279926753], 
"eval_len": [13, 24, 25, 33, 28, 32, 23, 35, 16, 21]}

 63%|██████▎   | 629998/1000000 [10:41:52<4:39:49, 22.04it/s]global step 630000, trans_decision ep_re 3.936216818063817

{"global_step": 630000, "eval_re": [0.5830309333720267, 5.573497469311129, 
1.28504490164466, 7.22963874508457, 0.4603561741999706, 5.57305224368371, 
7.689485403982197, 6.629441662140846, 2.9947404325933595, 1.3438802146257074], 
"eval_len": [15, 30, 20, 29, 23, 27, 19, 20, 18, 21]}

 64%|██████▍   | 639999/1000000 [10:52:20<4:35:43, 21.76it/s]global step 640000, trans_decision ep_re 18.131977003530643

{"global_step": 640000, "eval_re": [3.151262792434311, 0.5699658295813842, 
5.50575407988672, -0.3288838674847563, 147.9376813396621, 4.472285231224098, 
4.934421162547814, 0.8321039039009471, 5.351572018006094, 8.89360754554771], 
"eval_len": [33, 15, 21, 20, 111, 25, 28, 17, 19, 26]}

 65%|██████▍   | 649998/1000000 [11:02:22<4:23:40, 22.12it/s]global step 650000, trans_decision ep_re 25.456475808973487

{"global_step": 650000, "eval_re": [211.23260171592383, 8.988850520396356, 
7.806236763867205, 11.586660262770197, 3.03721985141458, -0.2653358449884978, 
-0.3594017665048304, 6.585413584474249, 4.587172381251473, 1.3653406211302985], 
"eval_len": [157, 20, 44, 22, 19, 16, 23, 18, 22, 24]}

 66%|██████▌   | 659999/1000000 [11:12:50<4:19:39, 21.82it/s]global step 660000, trans_decision ep_re 4.140023931634868

{"global_step": 660000, "eval_re": [8.22503861464737, 5.931250718255493, 
1.615931680054244, 4.007337496563199, 0.2933459960310635, 10.203101648363772, 
0.6672225349367733, 2.064891320732354, 6.619082525919882, 1.7730367808445302], 
"eval_len": [19, 20, 15, 18, 18, 26, 12, 18, 20, 17]}

 67%|██████▋   | 669998/1000000 [11:22:53<4:10:12, 21.98it/s]global step 670000, trans_decision ep_re 44.76100703194336

{"global_step": 670000, "eval_re": [7.692641682784706, 7.625267609906787, 
1.3572212997521493, 1.939973681785995, 3.7399790851974157, 3.7887096109036826, 
405.56048478754684, 1.1522652900818846, 3.276373995088587, 11.477153276385591], 
"eval_len": [19, 20, 21, 11, 19, 33, 218, 20, 16, 24]}

 68%|██████▊   | 679999/1000000 [11:33:20<4:03:24, 21.91it/s]global step 680000, trans_decision ep_re 23.586045171303777

{"global_step": 680000, "eval_re": [8.672739160634194, 11.597530019436698, 
1.8292986887035663, 7.6134728664056786, 21.141323710138877, 157.27643137329616, 
0.1897470199003247, 10.871994446945749, 1.9459077861783938, 14.722006641398098],
"eval_len": [23, 29, 17, 19, 52, 120, 23, 25, 15, 40]}

 69%|██████▉   | 689999/1000000 [11:43:40<3:57:03, 21.79it/s]global step 690000, trans_decision ep_re 5.248024561411146

{"global_step": 690000, "eval_re": [2.8851238150893312, 1.9934818479892975, 
13.507816140029798, 3.823156234846099, 0.5423471945080136, 3.1775146297492176, 
1.0911715690603214, 5.4625486831572, 8.191899019831261, 11.805186479850915], 
"eval_len": [16, 17, 32, 29, 11, 15, 12, 26, 35, 22]}

 70%|██████▉   | 699999/1000000 [11:53:50<3:48:50, 21.85it/s]global step 700000, trans_decision ep_re 8.13900629411012

{"global_step": 700000, "eval_re": [19.184144385669338, 22.629833827297944, 
-3.2995287389598063, 13.55082432251361, 3.4060571068146697, 4.654273304069422, 
0.405244072596942, 6.840573836749642, 8.088722085594002, 5.929918738755422], 
"eval_len": [36, 59, 19, 34, 28, 23, 16, 18, 20, 18]}

 71%|███████   | 709999/1000000 [12:04:10<3:40:57, 21.88it/s]global step 710000, trans_decision ep_re 12.155476989137629

{"global_step": 710000, "eval_re": [9.584019777070772, 6.131683135022454, 
34.09618107503382, 6.692632549759408, 10.25917182368698, 8.41022424053632, 
19.210785344735786, 11.229151641111388, 9.212377490131036, 6.728542814288321], 
"eval_len": [20, 16, 54, 20, 29, 26, 58, 25, 23, 19]}

 72%|███████▏  | 719999/1000000 [12:14:11<3:33:48, 21.83it/s]global step 720000, trans_decision ep_re 46.42164336830555

{"global_step": 720000, "eval_re": [12.360395755866994, 190.41682745806767, 
2.949199552927064, 4.285893384292578, 5.36148904150768, 9.231405687869508, 
6.732296210330018, 228.31073228425922, 1.3063735498267968, 3.261820758107984], 
"eval_len": [33, 113, 31, 15, 19, 24, 20, 134, 13, 19]}

 73%|███████▎  | 729999/1000000 [12:24:40<3:25:48, 21.87it/s]global step 730000, trans_decision ep_re 9.675967288983873

{"global_step": 730000, "eval_re": [8.341934503221463, 11.613504172968586, 
3.2582039477143248, 21.875479334402346, 26.01812336739261, 10.056974628371432, 
3.0650385923015815, -0.022019215619786467, 1.7002265242484211, 
10.852207034837747], "eval_len": [20, 35, 22, 36, 37, 21, 13, 13, 13, 22]}

 74%|███████▍  | 739998/1000000 [12:34:43<3:19:01, 21.77it/s]global step 740000, trans_decision ep_re 9.157191100158883

{"global_step": 740000, "eval_re": [16.070222329968374, 7.074114684993551, 
15.933575559355118, 3.735948346289353, 16.954101544707587, 13.481009343176224, 
-3.1077110525593206, 10.908955953161197, 9.47642249616606, 1.0452717963306908], 
"eval_len": [30, 21, 27, 15, 28, 87, 97, 28, 31, 20]}

 75%|███████▍  | 749999/1000000 [12:45:10<3:10:30, 21.87it/s]global step 750000, trans_decision ep_re 12.899592001707754

{"global_step": 750000, "eval_re": [7.129367279291841, 6.175619629182364, 
20.11310778419716, 5.804929883324737, 1.598419461370276, 6.9602159361924585, 
4.565989983480494, 57.81524859534628, 18.92772310516459, -0.09470164047267225], 
"eval_len": [23, 19, 30, 28, 28, 20, 26, 226, 30, 29]}

 76%|███████▌  | 759997/1000000 [12:55:30<3:03:29, 21.80it/s]global step 760000, trans_decision ep_re 51.78993827001766

{"global_step": 760000, "eval_re": [9.390282980713682, 3.5359016095999993, 
155.1956758065167, 10.786376245257502, 3.425735223381158, 2.142938267004582, 
1.7306370090317484, 13.22392294472782, 0.15117682196524682, 318.3167357919781], 
"eval_len": [21, 18, 115, 24, 28, 30, 20, 33, 17, 227]}

 77%|███████▋  | 769999/1000000 [13:05:31<2:54:34, 21.96it/s]global step 770000, trans_decision ep_re 15.449225018041744

{"global_step": 770000, "eval_re": [95.952698005626, 9.611724759927192, 
0.8020830303479143, 10.659503058844308, 10.629595107958325, -0.2009190325596819,
7.258217589125327, 11.9818395185988, 7.3927177045879535, 0.40479043796126957], 
"eval_len": [197, 22, 12, 23, 25, 25, 21, 35, 27, 16]}

 78%|███████▊  | 779999/1000000 [13:16:00<2:47:46, 21.86it/s]global step 780000, trans_decision ep_re 7.278137661567345

{"global_step": 780000, "eval_re": [2.8479251814647575, 10.219067917291456, 
6.86391308561705, 11.676237321898231, 4.921037330497018, 3.223038495746295, 
4.427120213461825, 12.072048538940694, 6.759631307142365, 9.771357223613764], 
"eval_len": [12, 27, 19, 25, 16, 24, 14, 29, 16, 24]}

 79%|███████▉  | 789998/1000000 [13:26:03<2:38:00, 22.15it/s]global step 790000, trans_decision ep_re 17.304283595789066

{"global_step": 790000, "eval_re": [7.7966318381269915, 1.0498786083758345, 
63.12756496817839, 3.060542148941233, 9.784866289611736, 66.49473047749647, 
12.61206768020328, 6.528646957078605, -1.265230934996293, 3.853137924874392], 
"eval_len": [35, 12, 98, 21, 26, 82, 25, 20, 23, 13]}

 80%|███████▉  | 799999/1000000 [13:36:30<2:31:42, 21.97it/s]global step 800000, trans_decision ep_re 8.444140108125392

{"global_step": 800000, "eval_re": [13.967986239801704, 6.140791901609178, 
25.39581858145495, 4.641236410480185, 5.730972246854517, 5.5654473221636565, 
5.077997177055729, 1.5838316439178477, -0.5493896759290109, 16.886709233845163],
"eval_len": [28, 20, 85, 21, 19, 18, 27, 16, 13, 32]}

 81%|████████  | 809998/1000000 [13:46:32<2:22:39, 22.20it/s]global step 810000, trans_decision ep_re 24.247951193639103

{"global_step": 810000, "eval_re": [6.314231525137609, 9.214700989004266, 
0.39000429663156083, 1.7825170230738834, 2.2298912959280575, 4.854671907198725, 
1.9629770535416036, 6.507216248594533, 206.94330218702163, 2.279999410259151], 
"eval_len": [26, 28, 15, 25, 16, 15, 16, 29, 158, 26]}

 82%|████████▏ | 819999/1000000 [13:57:00<2:15:15, 22.18it/s]global step 820000, trans_decision ep_re 25.133028573127206

{"global_step": 820000, "eval_re": [2.33128020111032, 6.612862005079209, 
9.189380154170863, 10.071924261098804, 7.213809467572923, 6.136365140638838, 
185.52473508702832, 7.6133532636198655, 7.462955727048534, 9.173620423904358], 
"eval_len": [14, 18, 21, 32, 27, 19, 119, 26, 24, 30]}

 83%|████████▎ | 829998/1000000 [14:07:02<2:08:27, 22.06it/s]global step 830000, trans_decision ep_re 4.706306017573801

{"global_step": 830000, "eval_re": [8.416374384051844, 4.656189785176671, 
5.981937712643781, 7.539402724603477, -0.4653538507812613, -0.09244264225478133,
2.650020609538924, 3.273335835213625, 16.8191555078423, -1.7155598902965685], 
"eval_len": [20, 24, 17, 19, 13, 17, 17, 14, 31, 22]}

 84%|████████▍ | 839999/1000000 [14:17:30<2:02:05, 21.84it/s]global step 840000, trans_decision ep_re 26.165244307249612

{"global_step": 840000, "eval_re": [-0.34260195755290557, 4.6218260395456205, 
3.527622306082484, 19.248731334118037, 1.8099774959445236, 0.34936078535199605, 
13.122985565584582, 211.95974408949965, 2.072499963334626, 5.2822974505874525], 
"eval_len": [15, 29, 27, 44, 15, 12, 34, 149, 14, 27]}

 85%|████████▍ | 849999/1000000 [14:27:40<1:53:04, 22.11it/s]global step 850000, trans_decision ep_re 10.656753747530676

{"global_step": 850000, "eval_re": [7.424007497689992, 0.18447024514979538, 
44.25557791675578, 4.13242644827843, 18.688929178905344, 17.668696383285106, 
-1.6455924432648081, -1.9595436749171429, 12.8429401389193, 4.9756257845049605],
"eval_len": [26, 13, 72, 15, 32, 31, 26, 21, 39, 22]}

 86%|████████▌ | 859997/1000000 [14:38:00<1:47:14, 21.76it/s]global step 860000, trans_decision ep_re 42.166810029632664

{"global_step": 860000, "eval_re": [8.265036930753327, 6.562377819359468, 
8.965516899471018, 12.115130396260515, 17.076368934635017, -0.8592657699099439, 
4.058749734616048, 10.571904394649687, 8.537607411190987, 346.37467354530054], 
"eval_len": [33, 29, 32, 35, 33, 17, 20, 24, 29, 205]}

 87%|████████▋ | 869999/1000000 [14:48:10<1:39:07, 21.86it/s]global step 870000, trans_decision ep_re 51.95545996628838

{"global_step": 870000, "eval_re": [197.9101115410284, 4.685295923415339, 
4.8459049258392595, 263.48212334045576, 7.319677230577964, 5.796229886512632, 
15.017810093678055, 7.854201499204578, 7.670955926593595, 4.972289295578156], 
"eval_len": [185, 28, 28, 143, 21, 20, 31, 25, 21, 20]}

 88%|████████▊ | 879997/1000000 [14:58:30<1:30:55, 22.00it/s]global step 880000, trans_decision ep_re 39.84418701379122

{"global_step": 880000, "eval_re": [0.23874142802387402, 155.70204360554237, 
18.846445963545513, 8.433120197257184, 14.75768763982391, 6.735236640620101, 
1.581034139558629, 11.327828793469578, 5.7021199548549095, 175.11761177521615], 
"eval_len": [24, 127, 30, 24, 42, 18, 27, 22, 21, 116]}

 89%|████████▉ | 889999/1000000 [15:08:40<1:23:46, 21.89it/s]global step 890000, trans_decision ep_re 8.786632160610951

{"global_step": 890000, "eval_re": [3.44411921723428, 9.485129187505386, 
15.200806756444317, 5.082548661972655, 7.563478689089142, 4.395885771431021, 
15.662176618847214, 17.695271263675547, 0.8717120193590877, 8.465193420550845], 
"eval_len": [37, 27, 30, 16, 26, 31, 35, 35, 26, 25]}

 90%|████████▉ | 899998/1000000 [15:18:43<1:15:26, 22.09it/s]global step 900000, trans_decision ep_re 28.5909313408411

{"global_step": 900000, "eval_re": [10.80630341131433, 7.245395292295229, 
233.582216965816, 4.568153674920059, 8.500129682700782, 7.809637818863207, 
2.6204367788888323, 5.110121564907504, 4.403262502265889, 1.2636557164391578], 
"eval_len": [31, 28, 130, 15, 29, 26, 18, 22, 17, 20]}

 91%|█████████ | 909999/1000000 [15:29:10<1:08:26, 21.92it/s]global step 910000, trans_decision ep_re 7.941235449599306

{"global_step": 910000, "eval_re": [5.200265441617308, 4.4660464397400474, 
1.0279829742099684, 10.895594677804956, 1.780895084242034, 7.29162965571116, 
11.317044108077665, 9.45237952318493, 18.604502412876123, 9.376014178528862], 
"eval_len": [21, 21, 20, 19, 17, 17, 33, 32, 35, 26]}

 92%|█████████▏| 919999/1000000 [15:39:30<1:01:17, 21.76it/s]global step 920000, trans_decision ep_re 2.7273853416253315

{"global_step": 920000, "eval_re": [3.162745929959095, 4.213476195102631, 
6.106926973415342, -0.26944067925357607, 2.228253955429965, 9.517352683662782, 
-0.8535981287280829, -1.163290023772569, 1.486373610276854, 2.8450529001608724],
"eval_len": [21, 12, 17, 22, 16, 29, 15, 16, 15, 19]}

 93%|█████████▎| 929999/1000000 [15:49:50<54:04, 21.57it/s]global step 930000, trans_decision ep_re 39.84198770945376

{"global_step": 930000, "eval_re": [2.4097134678592282, 7.842532750604722, 
11.367894900729162, 341.2038400419113, -0.8840853748993103, 15.747015957936648, 
10.093234981482972, 7.9450971199295, -0.4108476241572361, 3.1054808731406665], 
"eval_len": [29, 18, 38, 181, 12, 35, 21, 19, 17, 15]}

 94%|█████████▍| 939998/1000000 [16:00:03<45:44, 21.86it/s]global step 940000, trans_decision ep_re 4.185615696352259

{"global_step": 940000, "eval_re": [2.2990087302405544, 3.099359508020695, 
3.7975583653318994, 2.652026614955921, 2.6190557830048276, 11.904473526317586, 
7.071286393084692, 2.5250268168386083, 1.6817697368051792, 4.20659148892262], 
"eval_len": [15, 29, 16, 30, 22, 23, 22, 23, 16, 14]}

 95%|█████████▍| 949998/1000000 [16:10:40<37:50, 22.02it/s]global step 950000, trans_decision ep_re 8.300877183206703

{"global_step": 950000, "eval_re": [17.436195376279795, 5.134318237574354, 
4.452295091633753, 2.605688400439901, 5.594788765721685, 3.2307309666274158, 
7.90028384739646, 2.821545585154796, 10.702306588982422, 23.13061897225645], 
"eval_len": [26, 18, 17, 17, 34, 24, 21, 22, 28, 52]}

 96%|█████████▌| 959999/1000000 [16:21:00<30:57, 21.53it/s]global step 960000, trans_decision ep_re 7.020666291440554

{"global_step": 960000, "eval_re": [4.852433183129129, -0.6987665138357809, 
1.144321923601561, 0.6969604061652731, 9.979448954421313, 9.627141636611531, 
20.895781531194544, 0.17345376181301686, 21.957414353589968, 
1.5784736777149853], "eval_len": [24, 19, 17, 18, 33, 29, 35, 32, 37, 17]}

 97%|█████████▋| 969998/1000000 [16:31:31<23:23, 21.37it/s]global step 970000, trans_decision ep_re 37.78973526165057

{"global_step": 970000, "eval_re": [7.143195477999254, 3.477788133726548, 
12.955076721897292, 7.693136062119529, 0.912036933647661, 139.58513973013245, 
8.940391873455516, 168.49920740278074, 2.250639516779991, 26.440740763966765], 
"eval_len": [21, 13, 23, 24, 19, 84, 19, 103, 13, 55]}

 98%|█████████▊| 979997/1000000 [16:42:01<15:30, 21.50it/s]global step 980000, trans_decision ep_re 37.853071912542205

{"global_step": 980000, "eval_re": [185.2609284699622, 7.2569636162714035, 
19.93970215020852, 7.907250305819756, 3.20527449704844, 6.030957771711436, 
127.18611199466812, 11.902253140629135, 6.00531986132007, 3.8359573177829995], 
"eval_len": [126, 26, 32, 26, 24, 19, 84, 25, 31, 28]}

 99%|█████████▉| 989998/1000000 [16:52:31<07:40, 21.72it/s]global step 990000, trans_decision ep_re 9.741924872088864

{"global_step": 990000, "eval_re": [19.11889948329112, 9.156788310590338, 
6.757401562035162, 8.39606367630544, 2.078462733142206, 15.84746413272308, 
3.6303706818858483, 5.059306606907108, 20.83572792893382, 6.538763605074529], 
"eval_len": [30, 24, 25, 35, 22, 58, 17, 15, 46, 26]}

100%|█████████▉| 999999/1000000 [17:02:42<00:00, 21.44it/s]global step 1000000, trans_decision ep_re 25.097356341758342

{"global_step": 1000000, "eval_re": [6.056235210989925, 4.316456439972874, 
4.671548913888254, 14.440996114272385, 21.348594731744054, 1.5440372830258648, 
5.3073324761636185, 13.219075396908444, 172.05847947508127, 8.010807375536737], 
"eval_len": [27, 15, 34, 31, 37, 13, 18, 29, 117, 18]}

100%|██████████| 1000000/1000000 [17:02:59<00:00, 16.29it/s]
