import sys
sys.path.insert(0, "../utils")
import sklearn.datasets as skds
from sklearn.preprocessing import QuantileTransformer, KBinsDiscretizer, OrdinalEncoder, LabelEncoder, StandardScaler
import numpy as np
import pandas as pd
from transformation import BSplineTransformer, spline_transform_dataset
from trainers import FFMTrainer, FMTrainer
import math
import optuna
import optuna.samplers
from typing import Callable
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset
from tqdm import trange
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
print(device)
cuda:0
torch.manual_seed(42)
np.random.seed(42)
raw_df = pd.read_csv("../data/Ye_millionsongdataset/Training_set_songs.csv",
names=['Year', 'TA01', 'TA02', 'TA03', 'TA04', 'TA05', 'TA06', 'TA07', 'TA08', 'TA09', 'TA10', 'TA11', 'TA12', 'TC01', 'TC02', 'TC03', 'TC04', 'TC05', 'TC06', 'TC07', 'TC08', 'TC09', 'TC10', 'TC11', 'TC12', 'TC13', 'TC14', 'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TC20', 'TC21', 'TC22', 'TC23', 'TC24', 'TC25', 'TC26', 'TC27', 'TC28', 'TC29', 'TC30', 'TC31', 'TC32', 'TC33', 'TC34', 'TC35', 'TC36', 'TC37', 'TC38', 'TC39', 'TC40', 'TC41', 'TC42', 'TC43', 'TC44', 'TC45', 'TC46', 'TC47', 'TC48', 'TC49', 'TC50', 'TC51', 'TC52', 'TC53', 'TC54', 'TC55', 'TC56', 'TC57', 'TC58', 'TC59', 'TC60', 'TC61', 'TC62', 'TC63', 'TC64', 'TC65', 'TC66', 'TC67', 'TC68', 'TC69', 'TC70', 'TC71', 'TC72', 'TC73', 'TC74', 'TC75', 'TC76', 'TC77', 'TC78'],
dtype={0:int, 1:float, 2:float, 3:float, 4:float, 5:float, 6:float, 7:float, 8:float, 9:float, 10:float, 11:float, 12:float, 13:float, 14:float, 15:float, 16:float, 17:float, 18:float, 19:float, 20:float, 21:float, 22:float, 23:float, 24:float, 25:float, 26:float, 27:float, 28:float, 29:float, 30:float, 31:float, 32:float, 33:float, 34:float, 35:float, 36:float, 37:float, 38:float, 39:float, 40:float, 41:float, 42:float, 43:float, 44:float, 45:float, 46:float, 47:float, 48:float, 49:float, 50:float, 51:float, 52:float, 53:float, 54:float, 55:float, 56:float, 57:float, 58:float, 59:float, 60:float, 61:float, 62:float, 63:float, 64:float, 65:float, 66:float, 67:float, 68:float, 69:float, 70:float, 71:float, 72:float, 73:float, 74:float, 75:float, 76:float, 77:float, 78:float, 79:float, 80:float, 81:float, 82:float, 83:float, 84:float, 85:float, 86:float, 87:float, 88:float, 89:float, 90:float},
na_values="?", skiprows=1) # TODO: only 3000 lines are loaded in the data
raw_df.sample(6)
Year | TA01 | TA02 | TA03 | TA04 | TA05 | TA06 | TA07 | TA08 | TA09 | ... | TC69 | TC70 | TC71 | TC72 | TC73 | TC74 | TC75 | TC76 | TC77 | TC78 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
332595 | 2004 | 51.19589 | 18.54077 | 12.55200 | 3.25000 | -7.79542 | -19.63461 | 0.27662 | -1.03994 | -1.57231 | ... | -16.43458 | -37.05054 | -100.93186 | 49.26529 | -1.47467 | -39.25996 | -22.44389 | -16.37939 | -49.67664 | 3.87491 |
230573 | 1989 | 51.44573 | 53.42621 | 35.83483 | -16.72867 | -48.13185 | -13.04248 | -45.74081 | -6.18791 | 16.60100 | ... | 9.94220 | -71.82280 | 59.40250 | 45.14201 | -2.72343 | 35.62292 | 37.99939 | 7.04862 | 21.08678 | 2.07779 |
364530 | 1987 | 44.85215 | 33.51052 | 27.44631 | 24.99783 | -15.02508 | 12.29223 | 10.57365 | 7.06412 | -1.29649 | ... | 5.05131 | 56.78609 | 136.98499 | -30.38374 | 24.51034 | 62.77834 | 44.80304 | 30.34866 | 105.17991 | 2.58183 |
82857 | 2002 | 50.25653 | 59.83236 | 37.80210 | -0.57762 | -1.64064 | -11.77884 | -2.25574 | -4.17007 | 4.05922 | ... | -4.12555 | 21.34620 | 29.72172 | 56.71419 | 2.61590 | 56.47152 | -26.05716 | -0.77059 | 29.40943 | -0.02311 |
108108 | 1971 | 45.48775 | -5.49790 | 9.56187 | 9.36977 | -11.15726 | -3.63341 | 11.00297 | -6.36722 | 5.37455 | ... | -29.83405 | 228.02525 | 53.68190 | -47.38609 | 14.62809 | 124.90797 | -26.61476 | 5.08838 | 295.42035 | 19.74883 |
446568 | 2005 | 49.59492 | 35.40110 | -8.11273 | -13.40502 | 3.18931 | -14.05923 | 3.04436 | -1.43375 | 3.60354 | ... | -0.40689 | -91.80271 | -29.51019 | -6.40710 | 6.99983 | 81.51023 | -83.44656 | 2.81049 | 24.80947 | -10.32877 |
6 rows × 91 columns
raw_df.shape
(463715, 91)
raw_df.columns
Index(['Year', 'TA01', 'TA02', 'TA03', 'TA04', 'TA05', 'TA06', 'TA07', 'TA08', 'TA09', 'TA10', 'TA11', 'TA12', 'TC01', 'TC02', 'TC03', 'TC04', 'TC05', 'TC06', 'TC07', 'TC08', 'TC09', 'TC10', 'TC11', 'TC12', 'TC13', 'TC14', 'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TC20', 'TC21', 'TC22', 'TC23', 'TC24', 'TC25', 'TC26', 'TC27', 'TC28', 'TC29', 'TC30', 'TC31', 'TC32', 'TC33', 'TC34', 'TC35', 'TC36', 'TC37', 'TC38', 'TC39', 'TC40', 'TC41', 'TC42', 'TC43', 'TC44', 'TC45', 'TC46', 'TC47', 'TC48', 'TC49', 'TC50', 'TC51', 'TC52', 'TC53', 'TC54', 'TC55', 'TC56', 'TC57', 'TC58', 'TC59', 'TC60', 'TC61', 'TC62', 'TC63', 'TC64', 'TC65', 'TC66', 'TC67', 'TC68', 'TC69', 'TC70', 'TC71', 'TC72', 'TC73', 'TC74', 'TC75', 'TC76', 'TC77', 'TC78'], dtype='object')
train, test = train_test_split(raw_df, test_size=0.2, random_state=42)
tr_feats = train.drop("Year", axis=1)
tr_target = train["Year"].values
te_feats = test.drop("Year", axis=1)
te_target = test["Year"].values
target_scaler = StandardScaler()
tr_target = target_scaler.fit_transform(tr_target.reshape(-1, 1)).reshape(-1)
te_target = target_scaler.transform(te_target.reshape(-1, 1)).reshape(-1)
quant_transform = QuantileTransformer(output_distribution='uniform',
n_quantiles=10000,
subsample=len(tr_feats),
random_state=42)
X_train_qs = quant_transform.fit_transform(tr_feats)
X_test_qs = quant_transform.transform(te_feats)
def train_spline_fm(embedding_dim: int, step_size: float, batch_size: int, num_knots: int, num_epochs: int,
callback: Callable[[int, float], None]=None):
bs = BSplineTransformer(num_knots, 3)
tr_indices, tr_weights, tr_offsets, tr_fields = spline_transform_dataset(X_train_qs, bs)
te_indices, te_weights, te_offsets, te_fields = spline_transform_dataset(X_test_qs, bs)
num_fields = X_train_qs.shape[1]
num_embeddings = int(max(np.max(tr_indices), np.max(te_indices)) + 1)
train_ds = TensorDataset(
torch.tensor(tr_indices, dtype=torch.int64),
torch.tensor(tr_weights, dtype=torch.float32),
torch.tensor(tr_offsets, dtype=torch.int64),
torch.tensor(tr_fields, dtype=torch.int64),
torch.tensor(tr_target, dtype=torch.float32))
test_ds = TensorDataset(
torch.tensor(te_indices, dtype=torch.int64),
torch.tensor(te_weights, dtype=torch.float32),
torch.tensor(te_offsets, dtype=torch.int64),
torch.tensor(te_fields, dtype=torch.int64),
torch.tensor(te_target, dtype=torch.float32))
trainer = FMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.MSELoss(), device)
def train_spline_objective(trial: optuna.Trial):
embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
batch_size = trial.suggest_int('batch_size', 32, 256)
num_knots = trial.suggest_int('num_knots', 3, 48)
num_epochs = trial.suggest_int('num_epochs', 5, 15)
def callback(epoch: int, loss: float):
trial.report(math.sqrt(loss), epoch)
if trial.should_prune():
raise optuna.TrialPruned()
return math.sqrt(train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs,
callback=callback))
study = optuna.create_study(study_name='splines',
direction='minimize',
sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(train_spline_objective, n_trials=100)
[I 2023-05-17 08:19:57,777] A new study created in memory with name: splines [I 2023-05-17 08:23:37,582] Trial 0 finished with value: 1.2386136255730438 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 196, 'num_knots': 30, 'num_epochs': 6}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-17 08:29:49,023] Trial 1 finished with value: 9.578139736045964 and parameters: {'embedding_dim': 2, 'step_size': 0.012551115172973842, 'batch_size': 226, 'num_knots': 30, 'num_epochs': 12}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-17 08:33:39,931] Trial 2 finished with value: 1.4629122193885566 and parameters: {'embedding_dim': 1, 'step_size': 0.44447541666908114, 'batch_size': 219, 'num_knots': 12, 'num_epochs': 7}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-17 08:38:55,191] Trial 3 finished with value: 5.51720045029323 and parameters: {'embedding_dim': 2, 'step_size': 0.0328774741399112, 'batch_size': 150, 'num_knots': 22, 'num_epochs': 8}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-17 08:47:13,068] Trial 4 finished with value: 2.7195761291430305 and parameters: {'embedding_dim': 7, 'step_size': 0.017258215396625, 'batch_size': 97, 'num_knots': 19, 'num_epochs': 10}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-17 08:48:27,477] Trial 5 pruned. [I 2023-05-17 08:49:45,729] Trial 6 pruned. [I 2023-05-17 08:55:03,136] Trial 7 finished with value: 1.3228858834036774 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 53, 'num_knots': 34, 'num_epochs': 9}. Best is trial 0 with value: 1.2386136255730438. [W 2023-05-17 08:55:23,673] Trial 8 failed with parameters: {'embedding_dim': 2, 'step_size': 0.06938901412739397, 'batch_size': 39, 'num_knots': 44, 'num_epochs': 7} because of the following error: KeyboardInterrupt(). Traceback (most recent call last): File "/usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py", line 200, in _run_trial value_or_values = func(trial) File "/tmp/ipykernel_2644/2820662323.py", line 13, in train_spline_objective return math.sqrt(train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs, File "/tmp/ipykernel_2644/897098666.py", line 4, in train_spline_fm tr_indices, tr_weights, tr_offsets, tr_fields = spline_transform_dataset(X_train_qs, bs) File "/home/default/ephemeral_drive/notebooks/../utils/transformation.py", line 57, in spline_transform_dataset idx[regular_mask, :], weights[regular_mask, :] = spline_transformer(col[regular_mask]) File "/home/default/ephemeral_drive/notebooks/../utils/transformation.py", line 24, in __call__ arr = np.array([be(x) for be in self.basis]) KeyboardInterrupt [W 2023-05-17 08:55:23,676] Trial 8 failed with value None.
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Input In [15], in <cell line: 4>() 1 study = optuna.create_study(study_name='splines', 2 direction='minimize', 3 sampler=optuna.samplers.TPESampler(seed=42)) ----> 4 study.optimize(train_spline_objective, n_trials=100) File /usr/local/lib/python3.9/site-packages/optuna/study/study.py:425, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar) 321 def optimize( 322 self, 323 func: ObjectiveFuncType, (...) 330 show_progress_bar: bool = False, 331 ) -> None: 332 """Optimize an objective function. 333 334 Optimization is done by choosing a suitable set of hyperparameter values from a given (...) 422 If nested invocation of this method occurs. 423 """ --> 425 _optimize( 426 study=self, 427 func=func, 428 n_trials=n_trials, 429 timeout=timeout, 430 n_jobs=n_jobs, 431 catch=tuple(catch) if isinstance(catch, Iterable) else (catch,), 432 callbacks=callbacks, 433 gc_after_trial=gc_after_trial, 434 show_progress_bar=show_progress_bar, 435 ) File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:66, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar) 64 try: 65 if n_jobs == 1: ---> 66 _optimize_sequential( 67 study, 68 func, 69 n_trials, 70 timeout, 71 catch, 72 callbacks, 73 gc_after_trial, 74 reseed_sampler_rng=False, 75 time_start=None, 76 progress_bar=progress_bar, 77 ) 78 else: 79 if n_jobs == -1: File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:163, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar) 160 break 162 try: --> 163 frozen_trial = _run_trial(study, func, catch) 164 finally: 165 # The following line mitigates memory problems that can be occurred in some 166 # environments (e.g., services that use computing containers such as GitHub Actions). 167 # Please refer to the following PR for further details: 168 # https://github.com/optuna/optuna/pull/325. 169 if gc_after_trial: File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:251, in _run_trial(study, func, catch) 244 assert False, "Should not reach." 246 if ( 247 frozen_trial.state == TrialState.FAIL 248 and func_err is not None 249 and not isinstance(func_err, catch) 250 ): --> 251 raise func_err 252 return frozen_trial File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:200, in _run_trial(study, func, catch) 198 with get_heartbeat_thread(trial._trial_id, study._storage): 199 try: --> 200 value_or_values = func(trial) 201 except exceptions.TrialPruned as e: 202 # TODO(mamu): Handle multi-objective cases. 203 state = TrialState.PRUNED Input In [14], in train_spline_objective(trial) 10 if trial.should_prune(): 11 raise optuna.TrialPruned() ---> 13 return math.sqrt(train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs, 14 callback=callback)) Input In [13], in train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs, callback) 1 def train_spline_fm(embedding_dim: int, step_size: float, batch_size: int, num_knots: int, num_epochs: int, 2 callback: Callable[[int, float], None]=None): 3 bs = BSplineTransformer(num_knots, 3) ----> 4 tr_indices, tr_weights, tr_offsets, tr_fields = spline_transform_dataset(X_train_qs, bs) 5 te_indices, te_weights, te_offsets, te_fields = spline_transform_dataset(X_test_qs, bs) 7 num_fields = X_train_qs.shape[1] File /home/default/ephemeral_drive/notebooks/../utils/transformation.py:57, in spline_transform_dataset(numerical_zero_one, spline_transformer, special_values) 54 idx = np.zeros((col.shape[0], spline_transformer.basis_support()), dtype=np.int32) 55 weights = np.zeros((col.shape[0], spline_transformer.basis_support()), dtype=np.float32) ---> 57 idx[regular_mask, :], weights[regular_mask, :] = spline_transformer(col[regular_mask]) 58 special_offset = spline_transformer.basis_size() 59 for sval in col_special_values: File /home/default/ephemeral_drive/notebooks/../utils/transformation.py:24, in BSplineTransformer.__call__(self, x) 23 def __call__(self, x): ---> 24 arr = np.array([be(x) for be in self.basis]) 25 start_idx = np.argmax(~np.isnan(arr), axis=0) 26 full_idx = np.linspace(start_idx, start_idx + self.degree, self.degree + 1, dtype=np.int32) KeyboardInterrupt:
trial = study.best_trial
print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
study.best_params
train_spline_fm(**study.best_params)
def train_bin_fm(embedding_dim: int, step_size: float, batch_size: int,
num_bins: int, bin_strategy: str, num_epochs: int,
callback: Callable[[int, float], None]=None):
num_fields = tr_feats.shape[1]
num_embeddings = num_fields * num_bins
index_offsets = np.arange(0, num_fields) * num_bins
discretizer = KBinsDiscretizer(num_bins, encode='ordinal', strategy=bin_strategy, random_state=42)
discretizer.fit(tr_feats)
tr_indices = discretizer.transform(tr_feats)
tr_indices += np.tile(index_offsets, (tr_indices.shape[0], 1))
tr_weights = np.ones_like(tr_indices)
tr_fields = np.tile(np.arange(0, num_fields), (tr_indices.shape[0], 1))
tr_offsets = tr_fields.copy()
te_indices = discretizer.transform(te_feats)
te_indices += np.tile(index_offsets, (te_indices.shape[0], 1))
te_weights = np.ones_like(te_indices)
te_fields = np.tile(np.arange(0, num_fields), (te_indices.shape[0], 1))
te_offsets = te_fields.copy()
train_ds = TensorDataset(
torch.tensor(tr_indices, dtype=torch.int64),
torch.tensor(tr_weights, dtype=torch.float32),
torch.tensor(tr_offsets, dtype=torch.int64),
torch.tensor(tr_fields, dtype=torch.int64),
torch.tensor(tr_target, dtype=torch.float32))
test_ds = TensorDataset(
torch.tensor(te_indices, dtype=torch.int64),
torch.tensor(te_weights, dtype=torch.float32),
torch.tensor(te_offsets, dtype=torch.int64),
torch.tensor(te_fields, dtype=torch.int64),
torch.tensor(te_target, dtype=torch.float32))
trainer = FMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.MSELoss(), device)
def test_bins_objective(trial: optuna.Trial):
embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
batch_size = trial.suggest_int('batch_size', 32, 256)
num_bins = trial.suggest_int('num_bins', 2, 100)
bin_strategy = trial.suggest_categorical('bin_strategy', ['uniform', 'quantile'])
num_epochs = trial.suggest_int('num_epochs', 5, 15)
def callback(epoch: int, loss: float):
trial.report(math.sqrt(loss), epoch)
if trial.should_prune():
raise optuna.TrialPruned()
return math.sqrt(train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs,
callback=callback))
study_bins = optuna.create_study(study_name='bins',
direction='minimize',
sampler=optuna.samplers.TPESampler(seed=42))
study_bins.optimize(test_bins_objective, n_trials=100)
[I 2023-05-17 08:55:36,597] A new study created in memory with name: bins [I 2023-05-17 08:56:42,311] Trial 0 finished with value: 1.4975637678984222 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 196, 'num_bins': 61, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 0 with value: 1.4975637678984222. [I 2023-05-17 08:58:11,473] Trial 1 finished with value: 0.9477618933001615 and parameters: {'embedding_dim': 9, 'step_size': 0.10502105436744279, 'batch_size': 191, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:01:35,340] Trial 2 finished with value: 6.126791555787287 and parameters: {'embedding_dim': 2, 'step_size': 0.020492680115417352, 'batch_size': 100, 'num_bins': 53, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:04:24,192] Trial 3 finished with value: 3.7922082171917486 and parameters: {'embedding_dim': 2, 'step_size': 0.03135775732257745, 'batch_size': 114, 'num_bins': 47, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 1 with value: 0.9477618933001615. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 09:07:48,474] Trial 4 finished with value: 4.920383190956738 and parameters: {'embedding_dim': 6, 'step_size': 0.011992724522955167, 'batch_size': 168, 'num_bins': 18, 'bin_strategy': 'quantile', 'num_epochs': 15}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:12:29,875] Trial 5 finished with value: 1.5717226595459752 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 53, 'num_bins': 69, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 1 with value: 0.9477618933001615. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 09:16:04,541] Trial 6 finished with value: 1.3521047397480512 and parameters: {'embedding_dim': 1, 'step_size': 0.35067764992972184, 'batch_size': 90, 'num_bins': 67, 'bin_strategy': 'quantile', 'num_epochs': 11}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:19:13,939] Trial 7 finished with value: 1.2007141729915485 and parameters: {'embedding_dim': 2, 'step_size': 0.4439102767051397, 'batch_size': 206, 'num_bins': 95, 'bin_strategy': 'uniform', 'num_epochs': 15}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:19:47,894] Trial 8 pruned. [I 2023-05-17 09:20:02,999] Trial 9 pruned. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 09:21:00,876] Trial 10 finished with value: 1.3346845316959959 and parameters: {'embedding_dim': 10, 'step_size': 0.12983523609376665, 'batch_size': 250, 'num_bins': 3, 'bin_strategy': 'quantile', 'num_epochs': 5}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:22:35,439] Trial 11 finished with value: 1.5608530901900677 and parameters: {'embedding_dim': 8, 'step_size': 0.1376872218807011, 'batch_size': 216, 'num_bins': 100, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:24:19,889] Trial 12 finished with value: 1.4634795894105308 and parameters: {'embedding_dim': 7, 'step_size': 0.21367114396906817, 'batch_size': 202, 'num_bins': 95, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:24:32,672] Trial 13 pruned. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 09:25:13,851] Trial 14 pruned. [I 2023-05-17 09:27:03,976] Trial 15 finished with value: 1.2957459096253365 and parameters: {'embedding_dim': 10, 'step_size': 0.23255826170938765, 'batch_size': 127, 'num_bins': 85, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:27:16,943] Trial 16 pruned. [I 2023-05-17 09:29:27,810] Trial 17 finished with value: 1.0541845747340057 and parameters: {'embedding_dim': 5, 'step_size': 0.23975523187099407, 'batch_size': 143, 'num_bins': 18, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 1 with value: 0.9477618933001615. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 09:29:43,980] Trial 18 pruned. [I 2023-05-17 09:31:20,282] Trial 19 finished with value: 1.0621075746970652 and parameters: {'embedding_dim': 7, 'step_size': 0.2549093254857601, 'batch_size': 135, 'num_bins': 20, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:31:34,878] Trial 20 pruned. [I 2023-05-17 09:33:08,996] Trial 21 finished with value: 1.054130689788593 and parameters: {'embedding_dim': 7, 'step_size': 0.2657201022454841, 'batch_size': 130, 'num_bins': 18, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:35:09,400] Trial 22 finished with value: 0.978401527681663 and parameters: {'embedding_dim': 8, 'step_size': 0.3290394953042824, 'batch_size': 85, 'num_bins': 2, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:37:11,366] Trial 23 finished with value: 0.9773272457299675 and parameters: {'embedding_dim': 9, 'step_size': 0.31710795582758466, 'batch_size': 83, 'num_bins': 2, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:39:29,879] Trial 24 finished with value: 0.9562898527829065 and parameters: {'embedding_dim': 9, 'step_size': 0.3378995304928946, 'batch_size': 69, 'num_bins': 3, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615. [I 2023-05-17 09:42:34,285] Trial 25 finished with value: 0.9436633645011917 and parameters: {'embedding_dim': 9, 'step_size': 0.19823779463510535, 'batch_size': 65, 'num_bins': 9, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 25 with value: 0.9436633645011917. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 09:45:28,947] Trial 26 finished with value: 1.164404799550477 and parameters: {'embedding_dim': 9, 'step_size': 0.17813239995708516, 'batch_size': 63, 'num_bins': 10, 'bin_strategy': 'quantile', 'num_epochs': 7}. Best is trial 25 with value: 0.9436633645011917. [I 2023-05-17 09:47:35,094] Trial 27 finished with value: 1.0489196169140858 and parameters: {'embedding_dim': 10, 'step_size': 0.17903454538711375, 'batch_size': 67, 'num_bins': 27, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 25 with value: 0.9436633645011917. [I 2023-05-17 09:50:13,791] Trial 28 finished with value: 0.9782663359005557 and parameters: {'embedding_dim': 9, 'step_size': 0.09682429400583151, 'batch_size': 108, 'num_bins': 11, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 25 with value: 0.9436633645011917. [I 2023-05-17 09:50:48,771] Trial 29 pruned. [I 2023-05-17 09:51:11,400] Trial 30 pruned. [I 2023-05-17 09:53:24,190] Trial 31 finished with value: 0.9574653419516895 and parameters: {'embedding_dim': 9, 'step_size': 0.3083443733551168, 'batch_size': 73, 'num_bins': 5, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 25 with value: 0.9436633645011917. [I 2023-05-17 09:56:05,267] Trial 32 finished with value: 0.9674332191031986 and parameters: {'embedding_dim': 9, 'step_size': 0.35132816879017686, 'batch_size': 56, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 25 with value: 0.9436633645011917. [I 2023-05-17 10:02:08,316] Trial 33 finished with value: 0.931104525782054 and parameters: {'embedding_dim': 8, 'step_size': 0.1881950631756381, 'batch_size': 32, 'num_bins': 11, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 33 with value: 0.931104525782054. [I 2023-05-17 10:07:54,727] Trial 34 finished with value: 0.9282464792404165 and parameters: {'embedding_dim': 8, 'step_size': 0.1836127988688946, 'batch_size': 32, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:09:08,975] Trial 35 pruned. [I 2023-05-17 10:13:43,282] Trial 36 finished with value: 0.9607950740805458 and parameters: {'embedding_dim': 8, 'step_size': 0.10437182801723151, 'batch_size': 48, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 34 with value: 0.9282464792404165. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 10:14:26,193] Trial 37 pruned. [I 2023-05-17 10:14:46,099] Trial 38 pruned. [I 2023-05-17 10:15:29,836] Trial 39 pruned. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 10:15:45,293] Trial 40 pruned. [I 2023-05-17 10:18:47,195] Trial 41 finished with value: 0.9673396580923345 and parameters: {'embedding_dim': 9, 'step_size': 0.2645364621731419, 'batch_size': 58, 'num_bins': 14, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:22:33,567] Trial 42 finished with value: 0.9368678504818649 and parameters: {'embedding_dim': 9, 'step_size': 0.1569272763134277, 'batch_size': 44, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:26:02,538] Trial 43 finished with value: 0.9378460563292434 and parameters: {'embedding_dim': 8, 'step_size': 0.15746931534752506, 'batch_size': 49, 'num_bins': 8, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:26:34,040] Trial 44 pruned. [I 2023-05-17 10:30:30,545] Trial 45 finished with value: 0.9574478177026972 and parameters: {'embedding_dim': 7, 'step_size': 0.2057268463296305, 'batch_size': 51, 'num_bins': 21, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:31:14,626] Trial 46 pruned. [I 2023-05-17 10:35:49,407] Trial 47 finished with value: 0.947863769452646 and parameters: {'embedding_dim': 6, 'step_size': 0.12027419500221935, 'batch_size': 45, 'num_bins': 8, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:36:15,496] Trial 48 pruned. [I 2023-05-17 10:38:23,497] Trial 49 finished with value: 0.947492812136962 and parameters: {'embedding_dim': 10, 'step_size': 0.14599531659124054, 'batch_size': 95, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 10:38:42,999] Trial 50 pruned. [I 2023-05-17 10:40:50,881] Trial 51 finished with value: 0.9435857655186919 and parameters: {'embedding_dim': 10, 'step_size': 0.14004584581230414, 'batch_size': 94, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:44:33,910] Trial 52 finished with value: 0.9627206896151519 and parameters: {'embedding_dim': 10, 'step_size': 0.16710319214580308, 'batch_size': 55, 'num_bins': 15, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:47:05,892] Trial 53 finished with value: 0.9537351875104852 and parameters: {'embedding_dim': 9, 'step_size': 0.1328195451191491, 'batch_size': 79, 'num_bins': 10, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:48:16,979] Trial 54 pruned. [I 2023-05-17 10:50:21,839] Trial 55 finished with value: 0.9486370737672429 and parameters: {'embedding_dim': 10, 'step_size': 0.1858775884761615, 'batch_size': 61, 'num_bins': 6, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 10:51:32,428] Trial 56 pruned. [I 2023-05-17 10:51:52,706] Trial 57 pruned. [I 2023-05-17 10:52:12,163] Trial 58 pruned. [I 2023-05-17 10:52:37,200] Trial 59 pruned. [I 2023-05-17 10:55:27,478] Trial 60 pruned. [I 2023-05-17 10:57:34,576] Trial 61 finished with value: 0.9449334448633963 and parameters: {'embedding_dim': 10, 'step_size': 0.1366467782223684, 'batch_size': 96, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:01:50,640] Trial 62 finished with value: 0.9449154043429462 and parameters: {'embedding_dim': 10, 'step_size': 0.13463093485975017, 'batch_size': 38, 'num_bins': 5, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:05:24,433] Trial 63 finished with value: 0.942854436157313 and parameters: {'embedding_dim': 10, 'step_size': 0.16645696743907204, 'batch_size': 39, 'num_bins': 16, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:08:31,667] Trial 64 finished with value: 0.9660300247964014 and parameters: {'embedding_dim': 9, 'step_size': 0.24172761448782462, 'batch_size': 46, 'num_bins': 15, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:08:56,318] Trial 65 pruned. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 11:09:27,483] Trial 66 pruned. [I 2023-05-17 11:13:01,378] Trial 67 finished with value: 0.9467792009293124 and parameters: {'embedding_dim': 9, 'step_size': 0.10414553695447791, 'batch_size': 39, 'num_bins': 10, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:13:16,155] Trial 68 pruned. [I 2023-05-17 11:13:43,450] Trial 69 pruned. [I 2023-05-17 11:16:06,459] Trial 70 finished with value: 0.9486039607919019 and parameters: {'embedding_dim': 7, 'step_size': 0.2320824724639073, 'batch_size': 80, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:20:32,652] Trial 71 finished with value: 0.9458528975985477 and parameters: {'embedding_dim': 10, 'step_size': 0.12468574895708436, 'batch_size': 36, 'num_bins': 5, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:25:25,525] Trial 72 finished with value: 0.9340419252183723 and parameters: {'embedding_dim': 10, 'step_size': 0.1505868795227756, 'batch_size': 32, 'num_bins': 12, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:28:32,280] Trial 73 finished with value: 0.9447509737234354 and parameters: {'embedding_dim': 10, 'step_size': 0.15178479715933624, 'batch_size': 46, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 34 with value: 0.9282464792404165. [I 2023-05-17 11:33:43,249] Trial 74 finished with value: 0.9271481237738436 and parameters: {'embedding_dim': 9, 'step_size': 0.18056347741768636, 'batch_size': 35, 'num_bins': 11, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 74 with value: 0.9271481237738436. [I 2023-05-17 11:39:04,540] Trial 75 finished with value: 0.9308097221011861 and parameters: {'embedding_dim': 9, 'step_size': 0.18306349700515176, 'batch_size': 33, 'num_bins': 12, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 74 with value: 0.9271481237738436. [I 2023-05-17 11:39:45,545] Trial 76 pruned. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 11:40:36,417] Trial 77 pruned. [I 2023-05-17 11:41:14,545] Trial 78 pruned. [I 2023-05-17 11:41:45,751] Trial 79 pruned. [I 2023-05-17 11:41:57,978] Trial 80 pruned. [I 2023-05-17 11:45:04,041] Trial 81 pruned. [I 2023-05-17 11:50:43,194] Trial 82 finished with value: 0.9266576039490881 and parameters: {'embedding_dim': 9, 'step_size': 0.1522109531308001, 'batch_size': 32, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 82 with value: 0.9266576039490881. [I 2023-05-17 11:56:33,805] Trial 83 finished with value: 0.9245113241844185 and parameters: {'embedding_dim': 9, 'step_size': 0.15387178841176216, 'batch_size': 32, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185. [I 2023-05-17 11:58:45,128] Trial 84 pruned. [I 2023-05-17 11:59:14,857] Trial 85 pruned. [I 2023-05-17 11:59:42,059] Trial 86 pruned. [I 2023-05-17 12:05:17,455] Trial 87 finished with value: 0.9391445358927347 and parameters: {'embedding_dim': 7, 'step_size': 0.1248214132622492, 'batch_size': 32, 'num_bins': 9, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185. [I 2023-05-17 12:05:50,993] Trial 88 pruned. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 12:06:22,748] Trial 89 pruned. [I 2023-05-17 12:11:29,341] Trial 90 finished with value: 0.9454279427145241 and parameters: {'embedding_dim': 8, 'step_size': 0.2141758242978807, 'batch_size': 39, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185. [I 2023-05-17 12:17:19,461] Trial 91 finished with value: 0.9352401835954536 and parameters: {'embedding_dim': 6, 'step_size': 0.12725599471418103, 'batch_size': 32, 'num_bins': 10, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185. [I 2023-05-17 12:17:58,751] Trial 92 pruned. [I 2023-05-17 12:22:01,432] Trial 93 finished with value: 0.931574021363825 and parameters: {'embedding_dim': 9, 'step_size': 0.15452133804262197, 'batch_size': 47, 'num_bins': 10, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185. [I 2023-05-17 12:23:34,558] Trial 94 pruned. [I 2023-05-17 12:31:55,624] Trial 95 finished with value: 0.9200841999395952 and parameters: {'embedding_dim': 9, 'step_size': 0.1937905753882621, 'batch_size': 32, 'num_bins': 11, 'bin_strategy': 'uniform', 'num_epochs': 12}. Best is trial 95 with value: 0.9200841999395952. [I 2023-05-17 12:32:38,081] Trial 96 pruned.
study_bins.best_params
{'embedding_dim': 9, 'step_size': 0.18730626111573867, 'batch_size': 36, 'num_bins': 12, 'bin_strategy': 'uniform', 'num_epochs': 12}
trial = study_bins.best_trial
print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
Test loss: 0.9178594970619922 Best hyperparameters: {'embedding_dim': 9, 'step_size': 0.18730626111573867, 'batch_size': 36, 'num_bins': 12, 'bin_strategy': 'uniform', 'num_epochs': 12}
train_bin_fm(**study_bins.best_params)
spline_losses = []
for i in trange(20):
loss = train_spline_fm(**study.best_params)
spline_losses.append(math.sqrt(loss))
spline_losses
bin_losses = []
for i in trange(20):
loss = train_bin_fm(**study_bins.best_params)
bin_losses.append(math.sqrt(loss))
100%|██████████| 20/20 [2:32:40<00:00, 458.00s/it]
bin_losses
[0.925238985686155, 0.9163177902831852, 0.9152160603623787, 0.9213131631137549, 0.9204777295631849, 0.9132863523472907, 0.9249483055413078, 0.9170556904637397, 0.9165784330963357, 0.9197557627379956, 0.9206266194191419, 0.9143788694976471, 0.9135150737364409, 0.9184024841237616, 0.92070997297923, 0.9137055441795998, 0.9229001632891167, 0.9173062795121563, 0.9164100237132079, 0.9234843995759882]
np.mean(bin_losses), np.std(bin_losses), 100 * (np.std(bin_losses) / np.mean(bin_losses))
(0.918581385161081, 0.0036946303692452147, 0.402210455048284)
100 * (0.880369256648293 / np.mean(bin_losses) - 1)
-4.159906692000637