
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:11<12:40:22, 21.70it/s]global step 10000, trans_decision ep_re 23.95132605713886

{"global_step": 10000, "eval_re": [80.94749303663323, 9.695507979567612, 
23.85384159675775, 11.737477481458162, 15.72930491550623, 16.946032277358796, 
18.64103813965506, 10.96385289563501, -2.3806207663571306, 53.37933301517389], 
"eval_len": [93, 29, 46, 32, 29, 35, 40, 33, 16, 65]}

  2%|▏         | 19997/1000000 [15:50<12:39:47, 21.50it/s]global step 20000, trans_decision ep_re 9.212388705622564

{"global_step": 20000, "eval_re": [10.506562248715786, 9.641974318752592, 
8.553079606121642, 18.32846386552113, 20.5134763432496, 2.8057313949442575, 
12.301978034877802, 4.263311311558297, 5.403386226818496, -0.19407629433396867],
"eval_len": [31, 32, 21, 36, 35, 29, 26, 20, 18, 18]}

  3%|▎         | 29999/1000000 [26:10<12:28:57, 21.59it/s]global step 30000, trans_decision ep_re 11.19556536022506

{"global_step": 30000, "eval_re": [14.207549414309646, 7.861041739140102, 
13.524878154893605, 5.2084875249309, 13.151411857658982, 11.381424068048524, 
17.26326064047827, 12.63548478972854, 10.45183959758761, 6.270275815474415], 
"eval_len": [27, 32, 33, 35, 30, 28, 38, 33, 24, 30]}

  4%|▍         | 39999/1000000 [36:30<12:17:58, 21.68it/s]global step 40000, trans_decision ep_re 11.449627466676784

{"global_step": 40000, "eval_re": [8.494083243052117, 12.564843936319722, 
4.0386653173238605, 4.824060544130036, 7.3234011235004, 18.966094630953513, 
5.847221324045626, 22.726760836394835, 3.275300758700087, 26.435842952347638], 
"eval_len": [26, 26, 24, 16, 28, 48, 20, 34, 28, 43]}

  5%|▍         | 49999/1000000 [46:41<12:09:06, 21.72it/s]global step 50000, trans_decision ep_re 30.817696006116773

{"global_step": 50000, "eval_re": [15.139616326099365, 8.670018330840247, 
5.273612382397254, 17.337827862194946, 16.689687741247898, 5.052183625038297, 
20.65267264396606, 9.814129859768139, 144.18482196696166, 65.36238932265391], 
"eval_len": [32, 29, 17, 34, 34, 30, 34, 19, 111, 97]}

  6%|▌         | 59999/1000000 [57:20<11:59:34, 21.77it/s]global step 60000, trans_decision ep_re 12.899492018592534

{"global_step": 60000, "eval_re": [18.572987709183618, 5.377800626907275, 
7.464275625462973, 13.088423645027842, 7.06877111954196, 12.809038359103106, 
12.083227773936517, 14.094466798868707, 29.16106715130297, 9.274861376590353], 
"eval_len": [37, 18, 23, 35, 34, 25, 23, 36, 40, 23]}

  7%|▋         | 69999/1000000 [1:07:40<12:05:03, 21.38it/s]global step 70000, trans_decision ep_re 12.04709817308433

{"global_step": 70000, "eval_re": [5.24975485950234, 31.682776116895646, 
5.9081220161759465, 16.323418430743097, 24.08235216052562, 5.082359735635614, 
4.63341597965613, 10.709580799325702, 5.087750123828328, 11.711451508554871], 
"eval_len": [36, 44, 23, 31, 36, 15, 20, 26, 35, 24]}

  8%|▊         | 79998/1000000 [1:18:10<11:45:54, 21.72it/s]global step 80000, trans_decision ep_re 14.588729066721246

{"global_step": 80000, "eval_re": [8.38547012512416, 25.099899646311915, 
23.410556755325967, 2.884859061670114, -0.4070112591457234, 17.652487849955744, 
19.84404758506165, 8.867283064414599, 27.246503690675393, 12.903194147818647], 
"eval_len": [30, 40, 58, 27, 25, 26, 30, 21, 128, 29]}

  9%|▉         | 89999/1000000 [1:28:30<11:47:53, 21.43it/s]global step 90000, trans_decision ep_re 40.17973302002406

{"global_step": 90000, "eval_re": [28.2465648984145, 270.5879426866585, 
25.720315533354224, 11.358138428908154, 5.942844213907847, 3.497423657401584, 
4.481967393007885, 2.2348773612850845, 19.194608119380423, 30.532647907922495], 
"eval_len": [48, 210, 49, 25, 31, 14, 17, 16, 49, 50]}

 10%|▉         | 99999/1000000 [1:38:41<11:34:19, 21.60it/s]global step 100000, trans_decision ep_re 23.533165648143815

{"global_step": 100000, "eval_re": [3.266104205991049, 17.505545676775984, 
142.1723466804262, 19.317495748063415, 2.3227836293022075, 5.668391480336909, 
11.097427666103188, 22.187948187860794, 1.4338667583069462, 10.359746448271455],
"eval_len": [19, 33, 358, 39, 17, 16, 28, 37, 18, 23]}

 11%|█         | 109997/1000000 [1:49:20<11:34:54, 21.35it/s]global step 110000, trans_decision ep_re 51.29216438251849

{"global_step": 110000, "eval_re": [4.559385804917027, 446.0493732636627, 
20.66105184901484, -0.10780406805624777, 8.437643213366094, 6.716819849301424, 
7.268494087600775, 11.833483760568127, 6.345890548372265, 1.1573055164378345], 
"eval_len": [28, 262, 35, 19, 28, 19, 29, 28, 20, 11]}

 12%|█▏        | 119999/1000000 [1:59:40<11:21:08, 21.53it/s]global step 120000, trans_decision ep_re 9.9579087037321

{"global_step": 120000, "eval_re": [-0.19456089660509346, 7.670109805298978, 
4.071646823725746, 38.434347271372594, 8.359176131291445, 19.60203399123398, 
5.864219413543378, 5.484830675408501, 4.543989974070216, 5.7432938479812465], 
"eval_len": [14, 23, 22, 52, 23, 36, 27, 16, 33, 25]}

 13%|█▎        | 129997/1000000 [2:10:10<11:20:43, 21.30it/s]global step 130000, trans_decision ep_re 30.074334369361715

{"global_step": 130000, "eval_re": [10.373898149479974, 6.24703590157618, 
5.567515699173949, 10.987965694555465, 17.344634237907957, 15.442298444142613, 
10.390629045909693, 1.3037922046515016, 207.74073327076195, 15.344841045457887],
"eval_len": [24, 30, 17, 32, 36, 38, 39, 17, 119, 28]}

 14%|█▍        | 139998/1000000 [2:20:40<10:55:10, 21.88it/s]global step 140000, trans_decision ep_re 36.15604219235538

{"global_step": 140000, "eval_re": [13.29313742621474, 173.4646546289345, 
10.091959342657292, 12.343706589112756, 8.12040170914775, 53.33101999206251, 
25.321863732881774, 1.4168180932732397, 56.450541375092996, 7.7263190341762185],
"eval_len": [37, 187, 25, 30, 18, 60, 38, 27, 123, 33]}

 15%|█▍        | 149999/1000000 [2:31:00<11:04:27, 21.32it/s]global step 150000, trans_decision ep_re 42.27308469067583

{"global_step": 150000, "eval_re": [7.619354857508937, 12.36012579545597, 
3.3162335233791973, 10.387882994193424, 21.185597656269582, 21.20992802172382, 
7.284982208635489, 313.0746714027056, 4.255852661042186, 22.036217785844155], 
"eval_len": [18, 34, 16, 33, 34, 34, 16, 186, 16, 36]}

 16%|█▌        | 159997/1000000 [2:41:30<10:53:43, 21.42it/s]global step 160000, trans_decision ep_re 33.793435117864334

{"global_step": 160000, "eval_re": [31.65336502368114, 12.542424735195043, 
16.705893315149936, 0.07395159291010328, 3.6835059017835707, 251.42410440812847,
-0.26191550622559556, 5.97494431364195, 9.006973077371539, 7.131104317007139], 
"eval_len": [54, 32, 36, 27, 16, 153, 15, 18, 34, 23]}

 17%|█▋        | 169998/1000000 [2:52:00<10:41:22, 21.57it/s]global step 170000, trans_decision ep_re 17.76127104933996

{"global_step": 170000, "eval_re": [46.30455137325682, 16.102021920791163, 
8.395516795734697, 19.891898399395313, 11.502183629505025, 16.787151534458363, 
16.78176379869397, 7.172785131943323, 11.724893540485175, 22.949944369135746], 
"eval_len": [55, 33, 27, 32, 36, 27, 44, 24, 26, 47]}

 18%|█▊        | 179999/1000000 [3:02:10<10:36:40, 21.47it/s]global step 180000, trans_decision ep_re 32.71303715641294

{"global_step": 180000, "eval_re": [3.203003373730886, 5.139743538162396, 
18.56330959540051, 219.73170982706839, 10.854891887472668, 14.656136001521045, 
7.971070724976983, 16.05968403428636, 24.5752220500893, 6.375600531420864], 
"eval_len": [21, 23, 44, 155, 27, 31, 32, 34, 36, 26]}

 19%|█▉        | 189999/1000000 [3:12:50<10:28:00, 21.50it/s]global step 190000, trans_decision ep_re 13.934949394743166

{"global_step": 190000, "eval_re": [21.57411402877027, 21.57421671780863, 
30.42151529772767, 12.739792578916747, 8.904389932795446, 13.725267052707938, 
5.835930760860205, 5.2434223969380565, 13.21563283808893, 6.11521234281775], 
"eval_len": [45, 38, 50, 27, 22, 39, 18, 17, 24, 17]}

 20%|█▉        | 199998/1000000 [3:23:20<10:13:52, 21.72it/s]global step 200000, trans_decision ep_re 13.340527963531022

{"global_step": 200000, "eval_re": [11.244935303043894, 17.26766763774206, 
20.878881543538512, 14.85193925403937, 9.92536454263171, 7.144722338930101, 
15.503078710246559, 13.53498229402276, 5.5021288845535565, 17.551579126561684], 
"eval_len": [29, 29, 36, 32, 23, 19, 37, 33, 26, 27]}

 21%|██        | 209999/1000000 [3:33:40<10:07:45, 21.66it/s]global step 210000, trans_decision ep_re 11.54766350185372

{"global_step": 210000, "eval_re": [8.10420269016583, 10.830237539616853, 
5.3251836372073384, 5.078367184901667, 8.121396434708224, 16.720715650557278, 
15.850955350341065, 6.80030838820854, 10.583384140333, 28.06188400249741], 
"eval_len": [24, 28, 19, 16, 22, 34, 30, 18, 31, 49]}

 22%|██▏       | 219997/1000000 [3:44:10<10:02:18, 21.58it/s]global step 220000, trans_decision ep_re 61.99962152992091

{"global_step": 220000, "eval_re": [17.248941737746634, 9.366891796843552, 
7.646106122575132, 9.890528970128505, 8.96753986021811, 514.7545609299071, 
35.695915553274105, 6.676869859766003, 3.941763136845538, 5.807097331904291], 
"eval_len": [32, 20, 20, 21, 23, 265, 72, 25, 13, 35]}

 23%|██▎       | 229999/1000000 [3:54:30<9:52:58, 21.64it/s]global step 230000, trans_decision ep_re 15.040924560719768

{"global_step": 230000, "eval_re": [22.14385923893679, 6.1222743098283425, 
14.69026569498065, 11.051872096268719, 32.89138366245551, 7.510394992761975, 
10.147365926578752, 28.512963087210778, 8.86286522849345, 8.476001369682715], 
"eval_len": [33, 32, 32, 24, 54, 21, 21, 48, 18, 20]}

 24%|██▍       | 239999/1000000 [4:04:41<9:46:42, 21.59it/s]global step 240000, trans_decision ep_re 12.463161508981326

{"global_step": 240000, "eval_re": [3.796657885510294, 9.323242763396703, 
24.624951917079485, 6.7274524140254135, 13.96326033536041, 11.828359287497126, 
5.137968240492123, 18.853972514847005, 22.814947189416845, 7.560802542187857], 
"eval_len": [16, 19, 38, 17, 26, 24, 17, 33, 40, 27]}

 25%|██▍       | 249997/1000000 [4:15:20<9:35:16, 21.73it/s]global step 250000, trans_decision ep_re 56.560472154547895

{"global_step": 250000, "eval_re": [10.864398267692538, 7.239167435784173, 
177.47937397041696, 28.95049282479713, 8.550448336464015, 16.172942305418296, 
5.083309019790692, 277.0943532699669, 12.040593042957713, 22.12964307219047], 
"eval_len": [28, 26, 172, 36, 23, 38, 30, 219, 25, 37]}

 26%|██▌       | 259999/1000000 [4:25:40<9:35:32, 21.43it/s]global step 260000, trans_decision ep_re 29.921321949548844

{"global_step": 260000, "eval_re": [10.53935483418826, 3.0006398474391185, 
6.349002990691747, 8.051528930187743, 9.27380514060331, 22.785025159606093, 
18.189107224440622, 8.739050495168735, 1.6010613583788647, 210.68464351478391], 
"eval_len": [30, 28, 18, 23, 25, 33, 31, 19, 14, 149]}

 27%|██▋       | 269999/1000000 [4:36:00<9:25:20, 21.52it/s]global step 270000, trans_decision ep_re 9.177655375682868

{"global_step": 270000, "eval_re": [5.855910768411052, 8.719569530574722, 
5.101781637941416, 9.404751590231996, 8.100971286015833, 7.85720563917898, 
5.002396464298611, 13.877999148728433, 15.037370213963852, 12.818597477483797], 
"eval_len": [23, 34, 23, 24, 23, 19, 21, 35, 26, 23]}

 28%|██▊       | 279999/1000000 [4:46:10<9:19:28, 21.45it/s]global step 280000, trans_decision ep_re 24.852875334626127

{"global_step": 280000, "eval_re": [20.37913814087619, 6.047229960017253, 
2.2480634369818384, 11.40950315669102, 22.54960469042622, 4.1552277479294375, 
17.92221066073242, 19.532629851498598, 9.745811909586253, 134.53933379152204], 
"eval_len": [33, 19, 23, 24, 36, 17, 27, 32, 24, 90]}

 29%|██▉       | 289999/1000000 [4:56:32<9:07:39, 21.61it/s]global step 290000, trans_decision ep_re 24.334246327994627

{"global_step": 290000, "eval_re": [162.31422649436658, 4.128654234229775, 
0.5198069525154021, 5.551239083282869, 2.115095167677995, 24.660940426021067, 
10.410292106012868, 12.912627400100165, 7.995307595275491, 12.734273820464084], 
"eval_len": [98, 33, 16, 15, 16, 39, 24, 28, 21, 36]}

 30%|██▉       | 299998/1000000 [5:07:10<8:58:51, 21.65it/s]global step 300000, trans_decision ep_re 11.08480308930715

{"global_step": 300000, "eval_re": [15.279449709181684, 4.1523675358494, 
10.27165091756334, 9.361708420179557, 21.463549318736213, 5.595799936710201, 
12.359093628376836, 7.456729528182765, 3.6539240009226, 21.253757897368907], 
"eval_len": [31, 18, 28, 21, 33, 16, 36, 20, 22, 35]}

 31%|███       | 309997/1000000 [5:17:30<8:52:24, 21.60it/s]global step 310000, trans_decision ep_re 72.23067329740347

{"global_step": 310000, "eval_re": [6.487788517605335, 243.1055668988169, 
7.335730604524351, 26.45564019598722, 4.215624265391782, 386.6473858559395, 
19.89316914916778, 9.485517078108183, 6.1665095067835045, 12.513800901710061], 
"eval_len": [26, 163, 23, 52, 15, 289, 30, 23, 25, 36]}

 32%|███▏      | 319999/1000000 [5:27:50<8:45:21, 21.57it/s]global step 320000, trans_decision ep_re 95.05133118861421

{"global_step": 320000, "eval_re": [1.990064050086517, 9.306170695547326, 
6.90432695693876, 6.11143043821431, 2.0724984117737133, 7.139165713058573, 
8.64207970339399, 7.178646060794973, 892.3368165948134, 8.832113261520629], 
"eval_len": [28, 27, 22, 28, 21, 29, 30, 30, 401, 24]}

 33%|███▎      | 329999/1000000 [5:38:02<8:37:17, 21.59it/s]global step 330000, trans_decision ep_re 21.859033011436946

{"global_step": 330000, "eval_re": [128.6997217937249, 4.312366773253307, 
15.894798670723686, 12.242712941260804, 6.006761736889398, 2.3797453616601723, 
11.875430097531193, 14.02204811602588, 13.00718046198069, 10.14956416131947], 
"eval_len": [85, 26, 28, 30, 19, 17, 23, 28, 25, 24]}

 34%|███▍      | 339999/1000000 [5:48:40<8:34:14, 21.39it/s]global step 340000, trans_decision ep_re 52.16074535877683

{"global_step": 340000, "eval_re": [14.993422410361582, 256.94136465681345, 
4.138011034642299, 148.68502713825367, 7.0865566203916766, 9.180955796404394, 
30.448242825779637, 16.61775379154785, 13.825342269709743, 19.6907770438641], 
"eval_len": [31, 110, 17, 242, 21, 25, 49, 29, 24, 34]}

 35%|███▍      | 349998/1000000 [5:59:10<8:19:48, 21.68it/s]global step 350000, trans_decision ep_re 90.78018517382841

{"global_step": 350000, "eval_re": [12.698747278112354, 118.1978582565337, 
6.042003347189183, 9.176520282328484, 6.41051869920026, 577.0100078394091, 
3.376975294944182, 4.291590018219366, 157.12627573789447, 13.471354984452958], 
"eval_len": [41, 113, 26, 23, 27, 328, 17, 18, 104, 32]}

 36%|███▌      | 359999/1000000 [6:09:30<8:16:28, 21.48it/s]global step 360000, trans_decision ep_re 11.390380334573704

{"global_step": 360000, "eval_re": [16.856606354689244, 9.78776009679377, 
7.143587190469865, 2.499852957875553, 4.371753947546066, 9.770639494939, 
30.573670003089184, 8.096479599345189, 17.35889846405229, 7.444555236936898], 
"eval_len": [31, 31, 21, 20, 28, 21, 52, 23, 26, 34]}

 37%|███▋      | 369999/1000000 [6:19:41<8:08:27, 21.50it/s]global step 370000, trans_decision ep_re 9.885493131359821

{"global_step": 370000, "eval_re": [6.821245158225207, 10.47019453113359, 
8.991542017369646, 11.724838127029914, 11.588200266587686, 11.519184505122, 
10.22346408715437, 12.791577792684317, 9.611830360921509, 5.112854467369979], 
"eval_len": [21, 28, 23, 30, 32, 23, 25, 22, 29, 19]}

 38%|███▊      | 379999/1000000 [6:30:20<8:01:46, 21.45it/s]global step 380000, trans_decision ep_re 68.57078195651856

{"global_step": 380000, "eval_re": [470.9831009665159, 6.157255754529916, 
143.77930107068516, 1.152376256678128, 19.047246537499767, 12.901599121226655, 
4.817027113351053, 9.99477874097669, -0.8572022549566616, 17.732336258678835], 
"eval_len": [295, 25, 177, 29, 35, 21, 17, 31, 18, 24]}

 39%|███▉      | 389999/1000000 [6:40:31<7:53:57, 21.45it/s]global step 390000, trans_decision ep_re 10.212219264227079

{"global_step": 390000, "eval_re": [7.511351605613522, 7.680983981127904, 
6.753023873540146, 0.09472421880071663, 7.569815635677653, 15.63862202322423, 
7.426246642089379, 16.064435982785223, 20.39896403564689, 12.984024643765137], 
"eval_len": [20, 36, 26, 11, 19, 25, 23, 30, 30, 26]}

 40%|███▉      | 399999/1000000 [6:50:52<7:41:15, 21.68it/s]global step 400000, trans_decision ep_re 45.82857258527874

{"global_step": 400000, "eval_re": [2.8588501231454244, 10.680598064152381, 
9.048511009875671, 14.549862062832707, 5.833133943147688, 383.98107409669944, 
15.773087362207574, 2.3817828324776493, 8.477040720269056, 4.701785637979881], 
"eval_len": [24, 23, 20, 32, 19, 231, 29, 16, 25, 15]}

 41%|████      | 409997/1000000 [7:01:30<7:39:39, 21.39it/s]global step 410000, trans_decision ep_re 11.111888409282818

{"global_step": 410000, "eval_re": [3.0984376640751274, 18.054377063811764, 
11.618108280117832, 16.51297623775355, 10.620191250495806, 7.759827476948818, 
10.262281909918212, 22.13749221645777, 0.7757702212089899, 10.27942177204033], 
"eval_len": [15, 37, 23, 29, 21, 26, 22, 32, 20, 19]}

 42%|████▏     | 419999/1000000 [7:11:50<7:28:53, 21.53it/s]global step 420000, trans_decision ep_re 30.947891933830693

{"global_step": 420000, "eval_re": [55.02941163734884, 178.84648543311837, 
3.376511840741118, 7.561936055168385, 6.133521578070419, 20.928244847352587, 
7.820154086730699, 11.700475918355139, 12.660102984995206, 5.422074956426146], 
"eval_len": [71, 99, 28, 23, 19, 40, 18, 30, 28, 18]}

 43%|████▎     | 429999/1000000 [7:22:10<7:23:49, 21.40it/s]global step 430000, trans_decision ep_re 10.356059558135026

{"global_step": 430000, "eval_re": [10.723717354878172, 5.238292218899898, 
9.001814444944173, 14.39844558501779, 4.825474506647093, 9.751012492543676, 
6.527026604757918, 21.77822582700076, 10.433530590069473, 10.883055956591308], 
"eval_len": [24, 26, 32, 36, 16, 38, 17, 39, 21, 23]}

 44%|████▍     | 439999/1000000 [7:32:22<7:10:32, 21.68it/s]global step 440000, trans_decision ep_re 50.87847499817179

{"global_step": 440000, "eval_re": [9.748323417652916, 8.004183766042685, 
328.5698511678663, 8.586971162642966, 4.101169775562487, 8.170907917274404, 
86.81063086941265, 34.57967921983976, 7.462498933301178, 12.750533752122571], 
"eval_len": [29, 22, 157, 20, 18, 18, 82, 50, 17, 31]}

 45%|████▍     | 449997/1000000 [7:43:00<7:07:20, 21.45it/s]global step 450000, trans_decision ep_re 60.36684419961428

{"global_step": 450000, "eval_re": [13.246767351389312, 13.047471347100949, 
7.101070224685342, 4.913112758731694, 39.80134442977452, 13.699858647821635, 
19.600577155078923, 471.67175828420665, 12.264026297047744, 8.32245550030595], 
"eval_len": [24, 24, 29, 19, 56, 32, 36, 215, 34, 23]}

 46%|████▌     | 459999/1000000 [7:53:20<6:56:51, 21.59it/s]global step 460000, trans_decision ep_re 12.332558390007396

{"global_step": 460000, "eval_re": [13.627834836196158, 18.829502358590318, 
2.408272259033029, 5.819662265795647, 5.972867730170123, 14.804022391787385, 
13.14076594179036, 10.207224631674745, 28.347496813820158, 10.167934671216049], 
"eval_len": [28, 45, 19, 33, 18, 31, 32, 22, 36, 29]}

 47%|████▋     | 469999/1000000 [8:03:31<6:48:56, 21.60it/s]global step 470000, trans_decision ep_re 12.58187226345594

{"global_step": 470000, "eval_re": [15.34187382355827, 13.148200131150697, 
9.102870104325584, 6.904743646337547, 11.576167842684947, 12.507767475299241, 
7.9993396032839135, 32.62349750634733, 6.451609974170328, 10.162652527401546], 
"eval_len": [26, 33, 26, 20, 32, 24, 23, 45, 20, 20]}

 48%|████▊     | 479998/1000000 [8:14:10<6:40:58, 21.61it/s]global step 480000, trans_decision ep_re 45.998374113897874

{"global_step": 480000, "eval_re": [12.857241851962682, 7.398293252467806, 
5.923213362016509, 18.62265060215647, 3.4465920863108837, 358.958063198373, 
6.067915855787498, 14.60750553705595, 11.082560825767638, 21.01970456708027], 
"eval_len": [27, 22, 25, 32, 19, 194, 19, 31, 26, 31]}

 49%|████▉     | 489999/1000000 [8:24:30<6:33:52, 21.58it/s]global step 490000, trans_decision ep_re 9.375797353803653

{"global_step": 490000, "eval_re": [15.629070018967576, 19.380918760995957, 
5.295271900294353, 13.222031021607014, 6.218249017358209, 3.5569205353656823, 
7.230951521718371, 15.741127254390165, -1.2206758317089963, 8.704109339048209], 
"eval_len": [27, 33, 22, 22, 18, 17, 18, 33, 21, 18]}

 50%|████▉     | 499999/1000000 [8:34:50<6:28:49, 21.43it/s]global step 500000, trans_decision ep_re 50.10535402590766

{"global_step": 500000, "eval_re": [21.793214541929043, 17.943114743132146, 
327.14986986466124, 7.388714561564703, 38.27441551270977, 27.33784214829642, 
12.335926945373606, 8.875510820131005, 39.13806417754541, 0.8168669437332775], 
"eval_len": [39, 31, 180, 21, 53, 35, 29, 37, 50, 19]}

 51%|█████     | 509998/1000000 [8:45:20<6:16:01, 21.72it/s]global step 510000, trans_decision ep_re 28.9968647893352

{"global_step": 510000, "eval_re": [18.139469613107224, 92.54176334507731, 
8.245681123200317, 10.424591702550053, 5.480437008125228, 15.574634681119647, 
116.676053082539, 11.815913998218482, 1.8018597777170988, 9.268243561697645], 
"eval_len": [30, 99, 20, 27, 16, 31, 69, 28, 23, 28]}

 52%|█████▏    | 519999/1000000 [8:55:40<6:15:09, 21.32it/s]global step 520000, trans_decision ep_re 60.452223396005934

{"global_step": 520000, "eval_re": [4.200221444703542, 14.666805375295525, 
1.6580344654575114, 14.503027306492536, 10.65582142353347, 10.508328006649235, 
20.001830483089012, 26.87480719258643, 228.29198976212686, 273.16136850012515], 
"eval_len": [22, 23, 13, 30, 24, 23, 34, 47, 192, 235]}

 53%|█████▎    | 529998/1000000 [9:06:10<5:58:53, 21.83it/s]global step 530000, trans_decision ep_re 9.897582265984655

{"global_step": 530000, "eval_re": [7.802174417787311, 26.340564471565752, 
4.287974771097301, 3.178722540745624, 18.81117113928887, 6.220054300226134, 
4.398546178670478, 3.6139757290419565, 14.485319127601883, 9.837319983821237], 
"eval_len": [22, 52, 22, 21, 45, 23, 31, 13, 26, 22]}

 54%|█████▍    | 539997/1000000 [9:16:30<5:55:23, 21.57it/s]global step 540000, trans_decision ep_re 96.10053909130134

{"global_step": 540000, "eval_re": [836.5446626952414, 11.090736401979354, 
10.27015676493699, 7.265645759964861, 25.39240530888073, 14.697329777571072, 
7.893852426341403, 6.946389780541011, 14.320337792450353, 26.58387420510623], 
"eval_len": [407, 23, 31, 23, 35, 34, 20, 30, 49, 54]}

 55%|█████▍    | 549999/1000000 [9:27:00<5:48:50, 21.50it/s]global step 550000, trans_decision ep_re 85.74478377580468

{"global_step": 550000, "eval_re": [12.278056433478033, 5.453765039320691, 
16.237198123833092, 3.596219061451921, 18.088015247843796, 8.283188071001607, 
23.368146808457976, 251.40887359979868, 9.175618156493385, 509.5587572163675], 
"eval_len": [36, 22, 34, 16, 30, 19, 34, 187, 22, 218]}

 56%|█████▌    | 559997/1000000 [9:37:20<5:41:16, 21.49it/s]global step 560000, trans_decision ep_re 38.90993444846551

{"global_step": 560000, "eval_re": [13.49585081011764, 16.541118036215614, 
10.049330729602904, 8.52729169917148, 10.888333030882853, 2.6862183987174912, 
298.49574332293105, 13.018268518433755, 7.894650325216164, 7.502539613366237], 
"eval_len": [27, 31, 21, 17, 34, 17, 168, 26, 18, 31]}

 57%|█████▋    | 569999/1000000 [9:47:31<5:32:58, 21.52it/s]global step 570000, trans_decision ep_re 37.119296464673205

{"global_step": 570000, "eval_re": [223.4309453629879, 7.62431013315504, 
10.547576798403695, 5.647331388154259, 4.6879964794123685, 6.288313275592177, 
21.30316808851512, 75.57455956496872, 11.132376609677978, 4.956386945864756], 
"eval_len": [122, 18, 20, 26, 17, 18, 33, 239, 26, 31]}

 58%|█████▊    | 579998/1000000 [9:58:10<5:20:45, 21.82it/s]global step 580000, trans_decision ep_re 60.16569181608797

{"global_step": 580000, "eval_re": [10.344724762610868, 10.603658567912351, 
9.867589715877417, 5.770580194370568, 229.34265434454565, 8.544236513305576, 
207.87136682027625, 8.479010996250189, 95.89176893667657, 14.941327309054294], 
"eval_len": [25, 34, 23, 23, 127, 27, 102, 21, 130, 30]}

 59%|█████▉    | 589999/1000000 [10:08:30<5:15:33, 21.65it/s]global step 590000, trans_decision ep_re 147.63920629678196

{"global_step": 590000, "eval_re": [6.69425755394538, 2.7907209469742336, 
7.374840420393169, 799.0155293452119, 9.14387849767502, 11.60786468307048, 
147.3812204394695, 473.33940306942577, 14.121007384939897, 4.923340626714177], 
"eval_len": [19, 15, 16, 337, 21, 21, 230, 181, 33, 27]}

 60%|█████▉    | 599999/1000000 [10:19:00<5:10:59, 21.44it/s]global step 600000, trans_decision ep_re 113.33445560865297

{"global_step": 600000, "eval_re": [6.141263796585641, 13.79853528520022, 
195.29151050462775, 304.0094177872906, 10.741000868815767, 126.32246167365255, 
31.684114086702973, 8.612839364363444, 14.671958476916107, 422.0714542423747], 
"eval_len": [27, 22, 100, 172, 20, 175, 52, 22, 22, 354]}

 61%|██████    | 609999/1000000 [10:29:20<5:00:35, 21.62it/s]global step 610000, trans_decision ep_re 100.1682343909465

{"global_step": 610000, "eval_re": [923.1356746895245, 6.215701371373723, 
3.4751888761791236, 6.508632593924526, 4.543321768322913, 27.279617906546214, 
7.807546119057192, 10.155164807680238, 9.64441903121203, 2.9170767456445468], 
"eval_len": [400, 18, 16, 20, 17, 61, 19, 25, 24, 23]}

 62%|██████▏   | 619999/1000000 [10:39:31<4:53:29, 21.58it/s]global step 620000, trans_decision ep_re 47.04312844374563

{"global_step": 620000, "eval_re": [371.12800522719283, 3.0492879041529015, 
15.13016129617391, 13.913077775023911, 19.556260036821783, 11.668337629738694, 
7.039819285143295, 8.662696891340728, 12.27797327459836, 8.005665117269913], 
"eval_len": [171, 16, 25, 35, 47, 25, 30, 34, 24, 18]}

 63%|██████▎   | 629998/1000000 [10:50:10<4:43:20, 21.76it/s]global step 630000, trans_decision ep_re 57.00321147729189

{"global_step": 630000, "eval_re": [9.839462858825286, 184.0150025571553, 
11.190869002076205, 14.60429409979983, 4.852731970462164, 10.187327400968437, 
12.279964407618312, 300.5310924530381, 18.82073226900427, 3.7106377539710342], 
"eval_len": [34, 154, 28, 31, 15, 33, 24, 151, 45, 14]}

 64%|██████▍   | 639999/1000000 [11:00:30<4:37:17, 21.64it/s]global step 640000, trans_decision ep_re 12.917274654297746

{"global_step": 640000, "eval_re": [12.870904434036564, 6.802283063747306, 
12.19209750979164, 12.825453162234776, 20.319438009797068, 17.703845861989652, 
10.352141617930078, 15.074436459978719, 18.15018438867038, 2.8819620348012562], 
"eval_len": [35, 27, 27, 32, 51, 47, 22, 33, 32, 19]}

 65%|██████▍   | 649999/1000000 [11:10:40<4:30:54, 21.53it/s]global step 650000, trans_decision ep_re 77.18208082231817

{"global_step": 650000, "eval_re": [31.498288821820204, 217.86402194971376, 
432.2573834124558, 7.799142379939686, 6.3061492333343345, 24.939356082092978, 
12.986520579028827, 25.702454185633492, 8.072776441649173, 4.3947151375135025], 
"eval_len": [48, 139, 241, 27, 22, 44, 27, 45, 21, 15]}

 66%|██████▌   | 659998/1000000 [11:21:20<4:19:42, 21.82it/s]global step 660000, trans_decision ep_re 111.98476994197043

{"global_step": 660000, "eval_re": [17.914863342172403, 9.793282466114086, 
5.3566009378186035, 305.79844937034176, 701.2971575629321, 38.14924124426008, 
19.56470178740805, 8.327155460098393, 4.164962298718347, 9.481284949840523], 
"eval_len": [32, 20, 20, 196, 276, 50, 48, 23, 29, 22]}

 67%|██████▋   | 669999/1000000 [11:31:40<4:17:28, 21.36it/s]global step 670000, trans_decision ep_re 15.535249135333363

{"global_step": 670000, "eval_re": [4.079713957086307, 11.3218478483006, 
1.8156867883960779, 43.67205577867449, 3.9394429899633976, 55.08580073115354, 
10.836161026939381, 8.569559177277135, 8.92150929570419, 7.110713759838487], 
"eval_len": [20, 22, 19, 52, 27, 54, 35, 21, 20, 21]}

 68%|██████▊   | 679999/1000000 [11:41:51<4:10:20, 21.30it/s]global step 680000, trans_decision ep_re 47.0482796674702

{"global_step": 680000, "eval_re": [2.2926713252567374, 5.706787174570345, 
4.74258497174968, 12.43258220419161, 16.932692120451513, 7.894158278270587, 
11.624516390668427, 398.8689093707249, 5.2656450776288395, 4.722249761189368], 
"eval_len": [18, 23, 25, 28, 40, 27, 26, 278, 30, 15]}

 69%|██████▉   | 689998/1000000 [11:52:30<3:58:48, 21.64it/s]global step 690000, trans_decision ep_re 15.045750421389736

{"global_step": 690000, "eval_re": [7.43167880648827, 15.865227599293341, 
10.755279882362599, 6.311976017542449, 43.24460279130123, 10.38364567658968, 
14.96846610285162, 3.2330610522134764, 6.106072529767202, 32.15749375548748], 
"eval_len": [21, 27, 30, 18, 55, 20, 33, 15, 18, 54]}

 70%|██████▉   | 699999/1000000 [12:02:50<3:51:02, 21.64it/s]global step 700000, trans_decision ep_re 37.43675619529435

{"global_step": 700000, "eval_re": [8.720811482970847, 167.02790931273296, 
7.67365797515646, 2.8420087435478165, 9.856503719658743, 3.9915900537104863, 
1.8122232635997642, 5.438312275935243, 9.139113623797849, 157.86543150183334], 
"eval_len": [23, 123, 26, 17, 21, 15, 26, 20, 34, 103]}

 71%|███████   | 709999/1000000 [12:13:00<3:45:19, 21.45it/s]global step 710000, trans_decision ep_re 25.741658385530865

{"global_step": 710000, "eval_re": [7.193825005667565, 127.14787959386727, 
12.179857412892378, 4.50178261187176, 7.543821929622773, 11.019021803647014, 
13.6531358647512, 14.904902786592372, 45.52509264747273, 13.747264198923597], 
"eval_len": [17, 111, 32, 14, 25, 34, 27, 34, 80, 27]}

 72%|███████▏  | 719998/1000000 [12:23:40<3:36:08, 21.59it/s]global step 720000, trans_decision ep_re 71.01421762019366

{"global_step": 720000, "eval_re": [7.07114997902559, 3.5581489077399824, 
20.474237579944344, 25.46793280171297, 8.517346283507978, 2.3070823284452935, 
6.387999637769243, 424.93545796813066, 20.08205967826619, 191.3407610373944], 
"eval_len": [20, 29, 35, 48, 46, 15, 28, 170, 49, 105]}

 73%|███████▎  | 729999/1000000 [12:34:00<3:29:48, 21.45it/s]global step 730000, trans_decision ep_re 138.55921732505482

{"global_step": 730000, "eval_re": [119.06103301208927, 581.6394349694431, 
15.07250711230511, 9.324851197428481, 374.63648465772906, 258.2096455638035, 
4.488175334866799, 6.422326063302874, 7.383240976937729, 9.354474362642128], 
"eval_len": [138, 252, 37, 33, 184, 239, 17, 33, 18, 35]}

 74%|███████▍  | 739997/1000000 [12:44:30<3:21:31, 21.50it/s]global step 740000, trans_decision ep_re 59.69207428126001

{"global_step": 740000, "eval_re": [9.584141709196098, 35.122635392719836, 
7.559647843523661, 5.180270884662597, 10.780769911977933, 27.581159581138124, 
29.36983008580913, 446.898887226041, 16.609400148478475, 8.234000029053252], 
"eval_len": [25, 46, 32, 18, 22, 47, 50, 252, 25, 19]}

 75%|███████▍  | 749999/1000000 [12:54:41<3:14:54, 21.38it/s]global step 750000, trans_decision ep_re 12.311458112719638

{"global_step": 750000, "eval_re": [13.385666585439749, 20.087737946154892, 
16.71052475280908, 6.933454875761382, 11.592549038969334, 4.917987492976174, 
4.040906782412836, 13.136249147321255, 11.647421350710378, 20.662083154641298], 
"eval_len": [24, 38, 35, 22, 24, 27, 16, 29, 21, 35]}

 76%|███████▌  | 759997/1000000 [13:05:20<3:05:42, 21.54it/s]global step 760000, trans_decision ep_re 26.188970005757955

{"global_step": 760000, "eval_re": [19.0680763062411, 142.5874507108013, 
13.419565304616196, 15.117848263288732, 10.807214545785861, 6.7668509313653145, 
12.326998979034226, 9.531160649588317, 10.49326559079436, 21.7712687760641], 
"eval_len": [31, 110, 25, 24, 28, 28, 29, 21, 24, 36]}

 77%|███████▋  | 769999/1000000 [13:15:40<3:00:00, 21.29it/s]global step 770000, trans_decision ep_re 12.456115727591719

{"global_step": 770000, "eval_re": [17.980171168415264, 3.7780400962126826, 
16.640433205422763, 3.0476622444610175, 13.05588701098086, 3.441693482073659, 
9.021638293640326, 5.845141018975099, 4.409903505301688, 47.340587250433835], 
"eval_len": [32, 14, 29, 19, 29, 16, 21, 25, 15, 51]}

 78%|███████▊  | 779999/1000000 [13:25:52<2:49:46, 21.60it/s]global step 780000, trans_decision ep_re 13.102698320982856

{"global_step": 780000, "eval_re": [6.820245186647468, 9.932795429346704, 
13.799623498464419, 26.582198524341116, 8.350490812063603, 23.23165906825187, 
6.400918203848805, 13.336277041555737, 9.063649564760155, 13.509125880548675], 
"eval_len": [18, 29, 29, 50, 24, 51, 19, 52, 29, 24]}

 79%|███████▉  | 789999/1000000 [13:36:30<2:42:27, 21.54it/s]global step 790000, trans_decision ep_re 89.93211648652034

{"global_step": 790000, "eval_re": [200.3000283294851, 70.87758240803898, 
63.34022916525354, 6.183619172664326, 18.350177455339175, 7.806144364361077, 
486.39604531034075, 15.249354260470822, 21.371227904025112, 9.446756495224625], 
"eval_len": [115, 69, 98, 32, 34, 20, 287, 34, 35, 26]}

 80%|███████▉  | 799999/1000000 [13:46:50<2:33:04, 21.78it/s]global step 800000, trans_decision ep_re 9.225777109590835

{"global_step": 800000, "eval_re": [0.11325560866415393, 32.5508202153713, 
4.948309909400002, 2.1060966818833684, 18.298534535533285, 4.41310423528699, 
10.476073471626442, 5.605933201557694, 7.12293583435223, 6.622707402232872], 
"eval_len": [30, 49, 19, 17, 27, 21, 25, 18, 26, 18]}

 81%|████████  | 809999/1000000 [13:57:10<2:25:06, 21.82it/s]global step 810000, trans_decision ep_re 30.662046336072798

{"global_step": 810000, "eval_re": [5.70234807593307, 6.335366049941312, 
13.478885622744945, 2.4496141824503996, 1.1496795294207427, 12.284933712000388, 
0.12127337414892259, 6.804457014534569, 256.53496677199234, 1.7589390275612409],
"eval_len": [18, 16, 29, 27, 28, 99, 14, 20, 221, 17]}

 82%|████████▏ | 819999/1000000 [14:07:30<2:18:36, 21.64it/s]global step 820000, trans_decision ep_re 27.31671089341142

{"global_step": 820000, "eval_re": [7.1713864014143125, 15.43104961156175, 
108.14913618843426, 39.59695915589052, 40.71060607937874, 15.661895816664641, 
15.190356330424182, 12.397709973693281, 10.756322429843527, 8.101686946808945], 
"eval_len": [21, 31, 90, 54, 49, 26, 39, 35, 30, 17]}

 83%|████████▎ | 829999/1000000 [14:17:50<2:09:42, 21.84it/s]global step 830000, trans_decision ep_re 27.668265747222808

{"global_step": 830000, "eval_re": [3.14112013564499, 23.932919054883627, 
4.588613509982246, 12.918770333746775, 6.371859176285069, 17.30455527609917, 
7.886902340621097, 184.6367216330782, 14.820540356326141, 1.080655655560731], 
"eval_len": [17, 50, 23, 32, 25, 34, 28, 102, 44, 17]}

 84%|████████▍ | 839998/1000000 [14:28:10<2:02:38, 21.74it/s]global step 840000, trans_decision ep_re 18.106482164719687

{"global_step": 840000, "eval_re": [12.443687780826352, 5.269776551499346, 
39.474912147694965, 29.788996642438253, 29.45670406119079, 10.125976498350767, 
7.865567676722635, 8.449129986188076, 16.195003569483962, 21.995066732801746], 
"eval_len": [30, 23, 52, 50, 48, 29, 25, 21, 34, 33]}

 85%|████████▍ | 849999/1000000 [14:38:12<1:55:23, 21.67it/s]global step 850000, trans_decision ep_re 62.43944706374815

{"global_step": 850000, "eval_re": [8.707949541368292, 4.29111883340639, 
9.692824615277617, 3.4681730276478833, 19.656896753405345, 15.286134909078251, 
10.295481605758345, 6.849169769317993, 436.2999035502983, 109.84681803192302], 
"eval_len": [21, 20, 23, 19, 32, 30, 21, 16, 264, 140]}

 86%|████████▌ | 859999/1000000 [14:48:31<1:47:29, 21.71it/s]global step 860000, trans_decision ep_re 51.9130462863218

{"global_step": 860000, "eval_re": [21.131747943347246, 14.196918109040434, 
20.685775995704827, 239.55793883292995, 3.336174146682977, 20.06354547574331, 
174.3555756368383, 10.43206608910976, 3.967195113188321, 11.403525520632908], 
"eval_len": [32, 21, 47, 213, 32, 31, 101, 25, 27, 27]}

 87%|████████▋ | 869999/1000000 [14:58:52<1:39:54, 21.69it/s]global step 870000, trans_decision ep_re 58.88472916043596

{"global_step": 870000, "eval_re": [16.76490193964936, 12.633081528373417, 
8.804314220973016, 10.888155480864242, 16.19695369300028, 11.725592574677009, 
15.070298138234838, 9.960877927317476, 472.90648294664203, 13.896633154627935], 
"eval_len": [48, 21, 22, 28, 31, 22, 33, 40, 206, 28]}

 88%|████████▊ | 879998/1000000 [15:09:31<1:31:40, 21.82it/s]global step 880000, trans_decision ep_re 17.014372809983865

{"global_step": 880000, "eval_re": [13.812717340674054, 14.026796033711468, 
18.756862970608363, 11.748160385571529, 11.860487080293193, 18.24498984749557, 
26.97174449594589, 39.22939075086277, 15.606786249473094, -0.11420705479726435],
"eval_len": [32, 26, 31, 24, 36, 42, 36, 50, 26, 29]}

 89%|████████▉ | 889998/1000000 [15:19:51<1:24:08, 21.79it/s]global step 890000, trans_decision ep_re 40.012400927383986

{"global_step": 890000, "eval_re": [9.980785824708692, 8.971351420476086, 
336.5932099448892, 3.8804437376112024, 6.938332262534117, 10.592183586141445, 
11.079177088013918, 6.841216981523453, 2.05102353992159, 3.1962848880202275], 
"eval_len": [34, 21, 168, 19, 17, 25, 19, 19, 19, 14]}

 90%|████████▉ | 899997/1000000 [15:30:11<1:17:44, 21.44it/s]global step 900000, trans_decision ep_re 115.1480349599564

{"global_step": 900000, "eval_re": [6.117383573611491, 1049.5378067680456, 
12.239207139956706, 4.736412561642113, 9.138325950381475, 13.744012818904782, 
2.743724449107928, 1.1926799492179634, 6.088783835664345, 45.94201255303166], 
"eval_len": [17, 367, 23, 14, 36, 26, 24, 13, 32, 71]}

 91%|█████████ | 909999/1000000 [15:40:31<1:09:34, 21.56it/s]global step 910000, trans_decision ep_re 126.8585351929305

{"global_step": 910000, "eval_re": [3.5257031132779484, 10.483161001370492, 
249.66919055316765, 10.448973733097604, 28.71748507206549, 604.6207303938926, 
6.235960323907669, 332.01131656496773, 13.872787288745046, 9.000043884812879], 
"eval_len": [17, 24, 169, 34, 57, 306, 29, 170, 23, 21]}

 92%|█████████▏| 919999/1000000 [15:50:42<1:02:11, 21.44it/s]global step 920000, trans_decision ep_re 88.40877214651007

{"global_step": 920000, "eval_re": [10.644354584372497, 15.143225814242934, 
4.946585263803707, 6.240137702531776, 380.2954100783672, 4.2876981378089924, 
4.166447161175233, 5.974585830422704, 18.706664733020045, 433.68261215935576], 
"eval_len": [30, 33, 28, 36, 176, 15, 21, 17, 35, 187]}

 93%|█████████▎| 929999/1000000 [16:01:21<54:30, 21.40it/s]global step 930000, trans_decision ep_re 90.85067436495713

{"global_step": 930000, "eval_re": [4.674390552908344, 9.052868492816899, 
5.054048832744489, 606.3936428012618, 253.67352817621452, 2.629640782521978, 
0.11170901407276893, 17.375629873743975, 6.154732073088144, 3.386553050198456], 
"eval_len": [22, 28, 22, 344, 149, 31, 19, 32, 17, 20]}

 94%|█████████▍| 939998/1000000 [16:11:51<46:18, 21.60it/s]global step 940000, trans_decision ep_re 79.09282857751852

{"global_step": 940000, "eval_re": [7.401546787360431, 445.9266691192744, 
5.611105158346387, 3.8666629910196573, 17.252460316003283, 7.166212166344142, 
16.918049917523998, 273.49238495278735, 5.1313819612026625, 8.161812405322829], 
"eval_len": [27, 247, 30, 25, 41, 34, 47, 146, 19, 21]}

 95%|█████████▍| 949999/1000000 [16:22:01<38:50, 21.45it/s]global step 950000, trans_decision ep_re 8.787078144578166

{"global_step": 950000, "eval_re": [10.951095275199757, 11.378013816451514, 
17.313469946325263, 7.158164093272261, 8.27506221880654, 13.241018007378026, 
8.310047353889853, 4.526451560392857, 0.723933803318145, 5.993525370747456], 
"eval_len": [33, 23, 36, 20, 27, 27, 23, 16, 26, 16]}

 96%|█████████▌| 959998/1000000 [16:32:41<30:33, 21.82it/s]global step 960000, trans_decision ep_re 114.25803470846411

{"global_step": 960000, "eval_re": [7.279499264971324, 809.4547973951114, 
5.310772906226879, 8.738479260193161, 218.72471311986047, 3.069863275781716, 
7.389705445004572, 17.158440632194853, 58.563070163785355, 6.89100562151131], 
"eval_len": [18, 323, 15, 29, 150, 21, 17, 32, 65, 18]}

 97%|█████████▋| 969999/1000000 [16:43:01<23:02, 21.70it/s]global step 970000, trans_decision ep_re 74.20776467668368

{"global_step": 970000, "eval_re": [5.190912132013281, 640.5335296653798, 
11.053256718716911, 4.077311123584652, 7.6114158739714615, 6.355331648924369, 
5.032869597227004, 4.657283508441642, 17.764038396181626, 39.80169810239606], 
"eval_len": [31, 252, 23, 28, 23, 29, 26, 18, 29, 54]}

 98%|█████████▊| 979999/1000000 [16:53:21<15:27, 21.56it/s]global step 980000, trans_decision ep_re 39.60144762169311

{"global_step": 980000, "eval_re": [15.364058791765002, 161.7443210275394, 
11.750042561358827, 11.72272359898684, 5.241899937281986, 4.915923103348812, 
9.444052815263783, 162.5534960291195, 5.991266293917277, 7.286692058349792], 
"eval_len": [30, 133, 27, 22, 19, 29, 19, 149, 21, 19]}

 99%|█████████▉| 989999/1000000 [17:03:41<07:39, 21.78it/s]global step 990000, trans_decision ep_re 56.60165320237913

{"global_step": 990000, "eval_re": [8.666764452488287, 9.350408656345286, 
191.1052818730913, 2.7765211070309634, 3.7338033836323894, 11.89365543675316, 
14.172084017422899, 39.958998470413555, 277.2547401837882, 7.104274442825256], 
"eval_len": [21, 22, 194, 27, 17, 30, 25, 99, 164, 20]}

100%|█████████▉| 999997/1000000 [17:14:01<00:00, 21.60it/s]global step 1000000, trans_decision ep_re 95.01931361660549

{"global_step": 1000000, "eval_re": [6.628272757491913, 10.18029556236191, 
7.939590406985698, 627.5063918590153, 15.602419980494634, 172.6526096213433, 
34.792851942800105, 56.07564918380588, 10.510236814037233, 8.30481803771891], 
"eval_len": [19, 25, 19, 254, 32, 176, 87, 103, 20, 30]}

100%|██████████| 1000000/1000000 [17:14:04<00:00, 16.12it/s]
