In [1]:
import sys

sys.path.insert(0, "../utils")
In [26]:
import sklearn.datasets as skds
from sklearn.preprocessing import QuantileTransformer, KBinsDiscretizer, OrdinalEncoder, LabelEncoder, StandardScaler
import numpy as np
import pandas as pd
from transformation import BSplineTransformer, spline_transform_dataset
from trainers import FFMTrainer, FMTrainer
import math
import optuna
import optuna.samplers
from typing import Callable
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset
from tqdm import trange
In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
print(device)
cuda:0
In [4]:
torch.manual_seed(42)
np.random.seed(42)
In [5]:
raw_df = pd.read_csv("../data/Ye_millionsongdataset/Training_set_songs.csv",
                     names=['Year', 'TA01', 'TA02', 'TA03', 'TA04', 'TA05', 'TA06', 'TA07', 'TA08', 'TA09', 'TA10', 'TA11', 'TA12', 'TC01', 'TC02', 'TC03', 'TC04', 'TC05', 'TC06', 'TC07', 'TC08', 'TC09', 'TC10', 'TC11', 'TC12', 'TC13', 'TC14', 'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TC20', 'TC21', 'TC22', 'TC23', 'TC24', 'TC25', 'TC26', 'TC27', 'TC28', 'TC29', 'TC30', 'TC31', 'TC32', 'TC33', 'TC34', 'TC35', 'TC36', 'TC37', 'TC38', 'TC39', 'TC40', 'TC41', 'TC42', 'TC43', 'TC44', 'TC45', 'TC46', 'TC47', 'TC48', 'TC49', 'TC50', 'TC51', 'TC52', 'TC53', 'TC54', 'TC55', 'TC56', 'TC57', 'TC58', 'TC59', 'TC60', 'TC61', 'TC62', 'TC63', 'TC64', 'TC65', 'TC66', 'TC67', 'TC68', 'TC69', 'TC70', 'TC71', 'TC72', 'TC73', 'TC74', 'TC75', 'TC76', 'TC77', 'TC78'],
                     dtype={0:int, 1:float, 2:float, 3:float, 4:float, 5:float, 6:float, 7:float, 8:float, 9:float, 10:float, 11:float, 12:float, 13:float, 14:float, 15:float, 16:float, 17:float, 18:float, 19:float, 20:float, 21:float, 22:float, 23:float, 24:float, 25:float, 26:float, 27:float, 28:float, 29:float, 30:float, 31:float, 32:float, 33:float, 34:float, 35:float, 36:float, 37:float, 38:float, 39:float, 40:float, 41:float, 42:float, 43:float, 44:float, 45:float, 46:float, 47:float, 48:float, 49:float, 50:float, 51:float, 52:float, 53:float, 54:float, 55:float, 56:float, 57:float, 58:float, 59:float, 60:float, 61:float, 62:float, 63:float, 64:float, 65:float, 66:float, 67:float, 68:float, 69:float, 70:float, 71:float, 72:float, 73:float, 74:float, 75:float, 76:float, 77:float, 78:float, 79:float, 80:float, 81:float, 82:float, 83:float, 84:float, 85:float, 86:float, 87:float, 88:float, 89:float, 90:float},
                     na_values="?", skiprows=1)  # TODO: only 3000 lines are loaded in the data
In [6]:
raw_df.sample(6)
Out[6]:
Year TA01 TA02 TA03 TA04 TA05 TA06 TA07 TA08 TA09 ... TC69 TC70 TC71 TC72 TC73 TC74 TC75 TC76 TC77 TC78
332595 2004 51.19589 18.54077 12.55200 3.25000 -7.79542 -19.63461 0.27662 -1.03994 -1.57231 ... -16.43458 -37.05054 -100.93186 49.26529 -1.47467 -39.25996 -22.44389 -16.37939 -49.67664 3.87491
230573 1989 51.44573 53.42621 35.83483 -16.72867 -48.13185 -13.04248 -45.74081 -6.18791 16.60100 ... 9.94220 -71.82280 59.40250 45.14201 -2.72343 35.62292 37.99939 7.04862 21.08678 2.07779
364530 1987 44.85215 33.51052 27.44631 24.99783 -15.02508 12.29223 10.57365 7.06412 -1.29649 ... 5.05131 56.78609 136.98499 -30.38374 24.51034 62.77834 44.80304 30.34866 105.17991 2.58183
82857 2002 50.25653 59.83236 37.80210 -0.57762 -1.64064 -11.77884 -2.25574 -4.17007 4.05922 ... -4.12555 21.34620 29.72172 56.71419 2.61590 56.47152 -26.05716 -0.77059 29.40943 -0.02311
108108 1971 45.48775 -5.49790 9.56187 9.36977 -11.15726 -3.63341 11.00297 -6.36722 5.37455 ... -29.83405 228.02525 53.68190 -47.38609 14.62809 124.90797 -26.61476 5.08838 295.42035 19.74883
446568 2005 49.59492 35.40110 -8.11273 -13.40502 3.18931 -14.05923 3.04436 -1.43375 3.60354 ... -0.40689 -91.80271 -29.51019 -6.40710 6.99983 81.51023 -83.44656 2.81049 24.80947 -10.32877

6 rows × 91 columns

In [7]:
raw_df.shape
Out[7]:
(463715, 91)
In [8]:
raw_df.columns
Out[8]:
Index(['Year', 'TA01', 'TA02', 'TA03', 'TA04', 'TA05', 'TA06', 'TA07', 'TA08',
       'TA09', 'TA10', 'TA11', 'TA12', 'TC01', 'TC02', 'TC03', 'TC04', 'TC05',
       'TC06', 'TC07', 'TC08', 'TC09', 'TC10', 'TC11', 'TC12', 'TC13', 'TC14',
       'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TC20', 'TC21', 'TC22', 'TC23',
       'TC24', 'TC25', 'TC26', 'TC27', 'TC28', 'TC29', 'TC30', 'TC31', 'TC32',
       'TC33', 'TC34', 'TC35', 'TC36', 'TC37', 'TC38', 'TC39', 'TC40', 'TC41',
       'TC42', 'TC43', 'TC44', 'TC45', 'TC46', 'TC47', 'TC48', 'TC49', 'TC50',
       'TC51', 'TC52', 'TC53', 'TC54', 'TC55', 'TC56', 'TC57', 'TC58', 'TC59',
       'TC60', 'TC61', 'TC62', 'TC63', 'TC64', 'TC65', 'TC66', 'TC67', 'TC68',
       'TC69', 'TC70', 'TC71', 'TC72', 'TC73', 'TC74', 'TC75', 'TC76', 'TC77',
       'TC78'],
      dtype='object')
In [9]:
train, test = train_test_split(raw_df, test_size=0.2, random_state=42)
In [10]:
tr_feats = train.drop("Year", axis=1)
tr_target = train["Year"].values
te_feats = test.drop("Year", axis=1)
te_target = test["Year"].values
In [11]:
target_scaler = StandardScaler()
tr_target = target_scaler.fit_transform(tr_target.reshape(-1, 1)).reshape(-1)
te_target = target_scaler.transform(te_target.reshape(-1, 1)).reshape(-1)
In [12]:
quant_transform = QuantileTransformer(output_distribution='uniform',
                                      n_quantiles=10000,
                                      subsample=len(tr_feats),
                                      random_state=42)
X_train_qs = quant_transform.fit_transform(tr_feats)
X_test_qs = quant_transform.transform(te_feats)
In [13]:
def train_spline_fm(embedding_dim: int, step_size: float, batch_size: int, num_knots: int, num_epochs: int,
                     callback: Callable[[int, float], None]=None):
    bs = BSplineTransformer(num_knots, 3)
    tr_indices, tr_weights, tr_offsets, tr_fields = spline_transform_dataset(X_train_qs, bs)
    te_indices, te_weights, te_offsets, te_fields = spline_transform_dataset(X_test_qs, bs)

    num_fields = X_train_qs.shape[1]
    num_embeddings = int(max(np.max(tr_indices), np.max(te_indices)) + 1)

    train_ds = TensorDataset(
        torch.tensor(tr_indices, dtype=torch.int64),
        torch.tensor(tr_weights, dtype=torch.float32),
        torch.tensor(tr_offsets, dtype=torch.int64),
        torch.tensor(tr_fields, dtype=torch.int64),
        torch.tensor(tr_target, dtype=torch.float32))

    test_ds = TensorDataset(
        torch.tensor(te_indices, dtype=torch.int64),
        torch.tensor(te_weights, dtype=torch.float32),
        torch.tensor(te_offsets, dtype=torch.int64),
        torch.tensor(te_fields, dtype=torch.int64),
        torch.tensor(te_target, dtype=torch.float32))


    trainer = FMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
    return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.MSELoss(), device)
In [14]:
def train_spline_objective(trial: optuna.Trial):
    embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
    step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
    batch_size = trial.suggest_int('batch_size', 32, 256)
    num_knots = trial.suggest_int('num_knots', 3, 48)
    num_epochs = trial.suggest_int('num_epochs', 5, 15)

    def callback(epoch: int, loss: float):
        trial.report(math.sqrt(loss), epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return math.sqrt(train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs,
                                     callback=callback))
In [15]:
study = optuna.create_study(study_name='splines',
                            direction='minimize',
                            sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(train_spline_objective, n_trials=100)
[I 2023-05-17 08:19:57,777] A new study created in memory with name: splines
[I 2023-05-17 08:23:37,582] Trial 0 finished with value: 1.2386136255730438 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 196, 'num_knots': 30, 'num_epochs': 6}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-17 08:29:49,023] Trial 1 finished with value: 9.578139736045964 and parameters: {'embedding_dim': 2, 'step_size': 0.012551115172973842, 'batch_size': 226, 'num_knots': 30, 'num_epochs': 12}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-17 08:33:39,931] Trial 2 finished with value: 1.4629122193885566 and parameters: {'embedding_dim': 1, 'step_size': 0.44447541666908114, 'batch_size': 219, 'num_knots': 12, 'num_epochs': 7}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-17 08:38:55,191] Trial 3 finished with value: 5.51720045029323 and parameters: {'embedding_dim': 2, 'step_size': 0.0328774741399112, 'batch_size': 150, 'num_knots': 22, 'num_epochs': 8}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-17 08:47:13,068] Trial 4 finished with value: 2.7195761291430305 and parameters: {'embedding_dim': 7, 'step_size': 0.017258215396625, 'batch_size': 97, 'num_knots': 19, 'num_epochs': 10}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-17 08:48:27,477] Trial 5 pruned. 
[I 2023-05-17 08:49:45,729] Trial 6 pruned. 
[I 2023-05-17 08:55:03,136] Trial 7 finished with value: 1.3228858834036774 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 53, 'num_knots': 34, 'num_epochs': 9}. Best is trial 0 with value: 1.2386136255730438.
[W 2023-05-17 08:55:23,673] Trial 8 failed with parameters: {'embedding_dim': 2, 'step_size': 0.06938901412739397, 'batch_size': 39, 'num_knots': 44, 'num_epochs': 7} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_2644/2820662323.py", line 13, in train_spline_objective
    return math.sqrt(train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs,
  File "/tmp/ipykernel_2644/897098666.py", line 4, in train_spline_fm
    tr_indices, tr_weights, tr_offsets, tr_fields = spline_transform_dataset(X_train_qs, bs)
  File "/home/default/ephemeral_drive/notebooks/../utils/transformation.py", line 57, in spline_transform_dataset
    idx[regular_mask, :], weights[regular_mask, :] = spline_transformer(col[regular_mask])
  File "/home/default/ephemeral_drive/notebooks/../utils/transformation.py", line 24, in __call__
    arr = np.array([be(x) for be in self.basis])
KeyboardInterrupt
[W 2023-05-17 08:55:23,676] Trial 8 failed with value None.
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Input In [15], in <cell line: 4>()
      1 study = optuna.create_study(study_name='splines',
      2                             direction='minimize',
      3                             sampler=optuna.samplers.TPESampler(seed=42))
----> 4 study.optimize(train_spline_objective, n_trials=100)

File /usr/local/lib/python3.9/site-packages/optuna/study/study.py:425, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
    321 def optimize(
    322     self,
    323     func: ObjectiveFuncType,
   (...)
    330     show_progress_bar: bool = False,
    331 ) -> None:
    332     """Optimize an objective function.
    333 
    334     Optimization is done by choosing a suitable set of hyperparameter values from a given
   (...)
    422             If nested invocation of this method occurs.
    423     """
--> 425     _optimize(
    426         study=self,
    427         func=func,
    428         n_trials=n_trials,
    429         timeout=timeout,
    430         n_jobs=n_jobs,
    431         catch=tuple(catch) if isinstance(catch, Iterable) else (catch,),
    432         callbacks=callbacks,
    433         gc_after_trial=gc_after_trial,
    434         show_progress_bar=show_progress_bar,
    435     )

File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:66, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
     64 try:
     65     if n_jobs == 1:
---> 66         _optimize_sequential(
     67             study,
     68             func,
     69             n_trials,
     70             timeout,
     71             catch,
     72             callbacks,
     73             gc_after_trial,
     74             reseed_sampler_rng=False,
     75             time_start=None,
     76             progress_bar=progress_bar,
     77         )
     78     else:
     79         if n_jobs == -1:

File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:163, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
    160         break
    162 try:
--> 163     frozen_trial = _run_trial(study, func, catch)
    164 finally:
    165     # The following line mitigates memory problems that can be occurred in some
    166     # environments (e.g., services that use computing containers such as GitHub Actions).
    167     # Please refer to the following PR for further details:
    168     # https://github.com/optuna/optuna/pull/325.
    169     if gc_after_trial:

File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:251, in _run_trial(study, func, catch)
    244         assert False, "Should not reach."
    246 if (
    247     frozen_trial.state == TrialState.FAIL
    248     and func_err is not None
    249     and not isinstance(func_err, catch)
    250 ):
--> 251     raise func_err
    252 return frozen_trial

File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:200, in _run_trial(study, func, catch)
    198 with get_heartbeat_thread(trial._trial_id, study._storage):
    199     try:
--> 200         value_or_values = func(trial)
    201     except exceptions.TrialPruned as e:
    202         # TODO(mamu): Handle multi-objective cases.
    203         state = TrialState.PRUNED

Input In [14], in train_spline_objective(trial)
     10     if trial.should_prune():
     11         raise optuna.TrialPruned()
---> 13 return math.sqrt(train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs,
     14                                  callback=callback))

Input In [13], in train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs, callback)
      1 def train_spline_fm(embedding_dim: int, step_size: float, batch_size: int, num_knots: int, num_epochs: int,
      2                      callback: Callable[[int, float], None]=None):
      3     bs = BSplineTransformer(num_knots, 3)
----> 4     tr_indices, tr_weights, tr_offsets, tr_fields = spline_transform_dataset(X_train_qs, bs)
      5     te_indices, te_weights, te_offsets, te_fields = spline_transform_dataset(X_test_qs, bs)
      7     num_fields = X_train_qs.shape[1]

File /home/default/ephemeral_drive/notebooks/../utils/transformation.py:57, in spline_transform_dataset(numerical_zero_one, spline_transformer, special_values)
     54 idx = np.zeros((col.shape[0], spline_transformer.basis_support()), dtype=np.int32)
     55 weights = np.zeros((col.shape[0], spline_transformer.basis_support()), dtype=np.float32)
---> 57 idx[regular_mask, :], weights[regular_mask, :] = spline_transformer(col[regular_mask])
     58 special_offset = spline_transformer.basis_size()
     59 for sval in col_special_values:

File /home/default/ephemeral_drive/notebooks/../utils/transformation.py:24, in BSplineTransformer.__call__(self, x)
     23 def __call__(self, x):
---> 24     arr = np.array([be(x) for be in self.basis])
     25     start_idx = np.argmax(~np.isnan(arr), axis=0)
     26     full_idx = np.linspace(start_idx, start_idx + self.degree, self.degree + 1, dtype=np.int32)

KeyboardInterrupt: 
In [ ]:
trial = study.best_trial

print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
In [ ]:
study.best_params
In [ ]:
train_spline_fm(**study.best_params)
In [16]:
def train_bin_fm(embedding_dim: int, step_size: float, batch_size: int,
                  num_bins: int, bin_strategy: str, num_epochs: int,
                  callback: Callable[[int, float], None]=None):
    num_fields = tr_feats.shape[1]
    num_embeddings = num_fields * num_bins
    index_offsets = np.arange(0, num_fields) * num_bins

    discretizer = KBinsDiscretizer(num_bins, encode='ordinal', strategy=bin_strategy, random_state=42)
    discretizer.fit(tr_feats)

    tr_indices = discretizer.transform(tr_feats)
    tr_indices += np.tile(index_offsets, (tr_indices.shape[0], 1))
    tr_weights = np.ones_like(tr_indices)
    tr_fields = np.tile(np.arange(0, num_fields), (tr_indices.shape[0], 1))
    tr_offsets = tr_fields.copy()

    te_indices = discretizer.transform(te_feats)
    te_indices += np.tile(index_offsets, (te_indices.shape[0], 1))
    te_weights = np.ones_like(te_indices)
    te_fields = np.tile(np.arange(0, num_fields), (te_indices.shape[0], 1))
    te_offsets = te_fields.copy()

    train_ds = TensorDataset(
        torch.tensor(tr_indices, dtype=torch.int64),
        torch.tensor(tr_weights, dtype=torch.float32),
        torch.tensor(tr_offsets, dtype=torch.int64),
        torch.tensor(tr_fields, dtype=torch.int64),
        torch.tensor(tr_target, dtype=torch.float32))

    test_ds = TensorDataset(
        torch.tensor(te_indices, dtype=torch.int64),
        torch.tensor(te_weights, dtype=torch.float32),
        torch.tensor(te_offsets, dtype=torch.int64),
        torch.tensor(te_fields, dtype=torch.int64),
        torch.tensor(te_target, dtype=torch.float32))

    trainer = FMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
    return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.MSELoss(), device)
In [17]:
def test_bins_objective(trial: optuna.Trial):
    embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
    step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
    batch_size = trial.suggest_int('batch_size', 32, 256)
    num_bins = trial.suggest_int('num_bins', 2, 100)
    bin_strategy = trial.suggest_categorical('bin_strategy', ['uniform', 'quantile'])
    num_epochs = trial.suggest_int('num_epochs', 5, 15)

    def callback(epoch: int, loss: float):
        trial.report(math.sqrt(loss), epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return math.sqrt(train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs,
                                  callback=callback))
In [ ]:
study_bins = optuna.create_study(study_name='bins',
                                 direction='minimize',
                                 sampler=optuna.samplers.TPESampler(seed=42))
study_bins.optimize(test_bins_objective, n_trials=100)
[I 2023-05-17 08:55:36,597] A new study created in memory with name: bins
[I 2023-05-17 08:56:42,311] Trial 0 finished with value: 1.4975637678984222 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 196, 'num_bins': 61, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 0 with value: 1.4975637678984222.
[I 2023-05-17 08:58:11,473] Trial 1 finished with value: 0.9477618933001615 and parameters: {'embedding_dim': 9, 'step_size': 0.10502105436744279, 'batch_size': 191, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:01:35,340] Trial 2 finished with value: 6.126791555787287 and parameters: {'embedding_dim': 2, 'step_size': 0.020492680115417352, 'batch_size': 100, 'num_bins': 53, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:04:24,192] Trial 3 finished with value: 3.7922082171917486 and parameters: {'embedding_dim': 2, 'step_size': 0.03135775732257745, 'batch_size': 114, 'num_bins': 47, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 1 with value: 0.9477618933001615.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 09:07:48,474] Trial 4 finished with value: 4.920383190956738 and parameters: {'embedding_dim': 6, 'step_size': 0.011992724522955167, 'batch_size': 168, 'num_bins': 18, 'bin_strategy': 'quantile', 'num_epochs': 15}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:12:29,875] Trial 5 finished with value: 1.5717226595459752 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 53, 'num_bins': 69, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 1 with value: 0.9477618933001615.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 09:16:04,541] Trial 6 finished with value: 1.3521047397480512 and parameters: {'embedding_dim': 1, 'step_size': 0.35067764992972184, 'batch_size': 90, 'num_bins': 67, 'bin_strategy': 'quantile', 'num_epochs': 11}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:19:13,939] Trial 7 finished with value: 1.2007141729915485 and parameters: {'embedding_dim': 2, 'step_size': 0.4439102767051397, 'batch_size': 206, 'num_bins': 95, 'bin_strategy': 'uniform', 'num_epochs': 15}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:19:47,894] Trial 8 pruned. 
[I 2023-05-17 09:20:02,999] Trial 9 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 09:21:00,876] Trial 10 finished with value: 1.3346845316959959 and parameters: {'embedding_dim': 10, 'step_size': 0.12983523609376665, 'batch_size': 250, 'num_bins': 3, 'bin_strategy': 'quantile', 'num_epochs': 5}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:22:35,439] Trial 11 finished with value: 1.5608530901900677 and parameters: {'embedding_dim': 8, 'step_size': 0.1376872218807011, 'batch_size': 216, 'num_bins': 100, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:24:19,889] Trial 12 finished with value: 1.4634795894105308 and parameters: {'embedding_dim': 7, 'step_size': 0.21367114396906817, 'batch_size': 202, 'num_bins': 95, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:24:32,672] Trial 13 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 09:25:13,851] Trial 14 pruned. 
[I 2023-05-17 09:27:03,976] Trial 15 finished with value: 1.2957459096253365 and parameters: {'embedding_dim': 10, 'step_size': 0.23255826170938765, 'batch_size': 127, 'num_bins': 85, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:27:16,943] Trial 16 pruned. 
[I 2023-05-17 09:29:27,810] Trial 17 finished with value: 1.0541845747340057 and parameters: {'embedding_dim': 5, 'step_size': 0.23975523187099407, 'batch_size': 143, 'num_bins': 18, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 1 with value: 0.9477618933001615.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 09:29:43,980] Trial 18 pruned. 
[I 2023-05-17 09:31:20,282] Trial 19 finished with value: 1.0621075746970652 and parameters: {'embedding_dim': 7, 'step_size': 0.2549093254857601, 'batch_size': 135, 'num_bins': 20, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:31:34,878] Trial 20 pruned. 
[I 2023-05-17 09:33:08,996] Trial 21 finished with value: 1.054130689788593 and parameters: {'embedding_dim': 7, 'step_size': 0.2657201022454841, 'batch_size': 130, 'num_bins': 18, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:35:09,400] Trial 22 finished with value: 0.978401527681663 and parameters: {'embedding_dim': 8, 'step_size': 0.3290394953042824, 'batch_size': 85, 'num_bins': 2, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:37:11,366] Trial 23 finished with value: 0.9773272457299675 and parameters: {'embedding_dim': 9, 'step_size': 0.31710795582758466, 'batch_size': 83, 'num_bins': 2, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:39:29,879] Trial 24 finished with value: 0.9562898527829065 and parameters: {'embedding_dim': 9, 'step_size': 0.3378995304928946, 'batch_size': 69, 'num_bins': 3, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 1 with value: 0.9477618933001615.
[I 2023-05-17 09:42:34,285] Trial 25 finished with value: 0.9436633645011917 and parameters: {'embedding_dim': 9, 'step_size': 0.19823779463510535, 'batch_size': 65, 'num_bins': 9, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 25 with value: 0.9436633645011917.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 09:45:28,947] Trial 26 finished with value: 1.164404799550477 and parameters: {'embedding_dim': 9, 'step_size': 0.17813239995708516, 'batch_size': 63, 'num_bins': 10, 'bin_strategy': 'quantile', 'num_epochs': 7}. Best is trial 25 with value: 0.9436633645011917.
[I 2023-05-17 09:47:35,094] Trial 27 finished with value: 1.0489196169140858 and parameters: {'embedding_dim': 10, 'step_size': 0.17903454538711375, 'batch_size': 67, 'num_bins': 27, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 25 with value: 0.9436633645011917.
[I 2023-05-17 09:50:13,791] Trial 28 finished with value: 0.9782663359005557 and parameters: {'embedding_dim': 9, 'step_size': 0.09682429400583151, 'batch_size': 108, 'num_bins': 11, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 25 with value: 0.9436633645011917.
[I 2023-05-17 09:50:48,771] Trial 29 pruned. 
[I 2023-05-17 09:51:11,400] Trial 30 pruned. 
[I 2023-05-17 09:53:24,190] Trial 31 finished with value: 0.9574653419516895 and parameters: {'embedding_dim': 9, 'step_size': 0.3083443733551168, 'batch_size': 73, 'num_bins': 5, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 25 with value: 0.9436633645011917.
[I 2023-05-17 09:56:05,267] Trial 32 finished with value: 0.9674332191031986 and parameters: {'embedding_dim': 9, 'step_size': 0.35132816879017686, 'batch_size': 56, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 25 with value: 0.9436633645011917.
[I 2023-05-17 10:02:08,316] Trial 33 finished with value: 0.931104525782054 and parameters: {'embedding_dim': 8, 'step_size': 0.1881950631756381, 'batch_size': 32, 'num_bins': 11, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 33 with value: 0.931104525782054.
[I 2023-05-17 10:07:54,727] Trial 34 finished with value: 0.9282464792404165 and parameters: {'embedding_dim': 8, 'step_size': 0.1836127988688946, 'batch_size': 32, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:09:08,975] Trial 35 pruned. 
[I 2023-05-17 10:13:43,282] Trial 36 finished with value: 0.9607950740805458 and parameters: {'embedding_dim': 8, 'step_size': 0.10437182801723151, 'batch_size': 48, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 34 with value: 0.9282464792404165.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 10:14:26,193] Trial 37 pruned. 
[I 2023-05-17 10:14:46,099] Trial 38 pruned. 
[I 2023-05-17 10:15:29,836] Trial 39 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 10:15:45,293] Trial 40 pruned. 
[I 2023-05-17 10:18:47,195] Trial 41 finished with value: 0.9673396580923345 and parameters: {'embedding_dim': 9, 'step_size': 0.2645364621731419, 'batch_size': 58, 'num_bins': 14, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:22:33,567] Trial 42 finished with value: 0.9368678504818649 and parameters: {'embedding_dim': 9, 'step_size': 0.1569272763134277, 'batch_size': 44, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:26:02,538] Trial 43 finished with value: 0.9378460563292434 and parameters: {'embedding_dim': 8, 'step_size': 0.15746931534752506, 'batch_size': 49, 'num_bins': 8, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:26:34,040] Trial 44 pruned. 
[I 2023-05-17 10:30:30,545] Trial 45 finished with value: 0.9574478177026972 and parameters: {'embedding_dim': 7, 'step_size': 0.2057268463296305, 'batch_size': 51, 'num_bins': 21, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:31:14,626] Trial 46 pruned. 
[I 2023-05-17 10:35:49,407] Trial 47 finished with value: 0.947863769452646 and parameters: {'embedding_dim': 6, 'step_size': 0.12027419500221935, 'batch_size': 45, 'num_bins': 8, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:36:15,496] Trial 48 pruned. 
[I 2023-05-17 10:38:23,497] Trial 49 finished with value: 0.947492812136962 and parameters: {'embedding_dim': 10, 'step_size': 0.14599531659124054, 'batch_size': 95, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 10:38:42,999] Trial 50 pruned. 
[I 2023-05-17 10:40:50,881] Trial 51 finished with value: 0.9435857655186919 and parameters: {'embedding_dim': 10, 'step_size': 0.14004584581230414, 'batch_size': 94, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:44:33,910] Trial 52 finished with value: 0.9627206896151519 and parameters: {'embedding_dim': 10, 'step_size': 0.16710319214580308, 'batch_size': 55, 'num_bins': 15, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:47:05,892] Trial 53 finished with value: 0.9537351875104852 and parameters: {'embedding_dim': 9, 'step_size': 0.1328195451191491, 'batch_size': 79, 'num_bins': 10, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:48:16,979] Trial 54 pruned. 
[I 2023-05-17 10:50:21,839] Trial 55 finished with value: 0.9486370737672429 and parameters: {'embedding_dim': 10, 'step_size': 0.1858775884761615, 'batch_size': 61, 'num_bins': 6, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 10:51:32,428] Trial 56 pruned. 
[I 2023-05-17 10:51:52,706] Trial 57 pruned. 
[I 2023-05-17 10:52:12,163] Trial 58 pruned. 
[I 2023-05-17 10:52:37,200] Trial 59 pruned. 
[I 2023-05-17 10:55:27,478] Trial 60 pruned. 
[I 2023-05-17 10:57:34,576] Trial 61 finished with value: 0.9449334448633963 and parameters: {'embedding_dim': 10, 'step_size': 0.1366467782223684, 'batch_size': 96, 'num_bins': 7, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:01:50,640] Trial 62 finished with value: 0.9449154043429462 and parameters: {'embedding_dim': 10, 'step_size': 0.13463093485975017, 'batch_size': 38, 'num_bins': 5, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:05:24,433] Trial 63 finished with value: 0.942854436157313 and parameters: {'embedding_dim': 10, 'step_size': 0.16645696743907204, 'batch_size': 39, 'num_bins': 16, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:08:31,667] Trial 64 finished with value: 0.9660300247964014 and parameters: {'embedding_dim': 9, 'step_size': 0.24172761448782462, 'batch_size': 46, 'num_bins': 15, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:08:56,318] Trial 65 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 11:09:27,483] Trial 66 pruned. 
[I 2023-05-17 11:13:01,378] Trial 67 finished with value: 0.9467792009293124 and parameters: {'embedding_dim': 9, 'step_size': 0.10414553695447791, 'batch_size': 39, 'num_bins': 10, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:13:16,155] Trial 68 pruned. 
[I 2023-05-17 11:13:43,450] Trial 69 pruned. 
[I 2023-05-17 11:16:06,459] Trial 70 finished with value: 0.9486039607919019 and parameters: {'embedding_dim': 7, 'step_size': 0.2320824724639073, 'batch_size': 80, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:20:32,652] Trial 71 finished with value: 0.9458528975985477 and parameters: {'embedding_dim': 10, 'step_size': 0.12468574895708436, 'batch_size': 36, 'num_bins': 5, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:25:25,525] Trial 72 finished with value: 0.9340419252183723 and parameters: {'embedding_dim': 10, 'step_size': 0.1505868795227756, 'batch_size': 32, 'num_bins': 12, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:28:32,280] Trial 73 finished with value: 0.9447509737234354 and parameters: {'embedding_dim': 10, 'step_size': 0.15178479715933624, 'batch_size': 46, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 6}. Best is trial 34 with value: 0.9282464792404165.
[I 2023-05-17 11:33:43,249] Trial 74 finished with value: 0.9271481237738436 and parameters: {'embedding_dim': 9, 'step_size': 0.18056347741768636, 'batch_size': 35, 'num_bins': 11, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 74 with value: 0.9271481237738436.
[I 2023-05-17 11:39:04,540] Trial 75 finished with value: 0.9308097221011861 and parameters: {'embedding_dim': 9, 'step_size': 0.18306349700515176, 'batch_size': 33, 'num_bins': 12, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 74 with value: 0.9271481237738436.
[I 2023-05-17 11:39:45,545] Trial 76 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 11:40:36,417] Trial 77 pruned. 
[I 2023-05-17 11:41:14,545] Trial 78 pruned. 
[I 2023-05-17 11:41:45,751] Trial 79 pruned. 
[I 2023-05-17 11:41:57,978] Trial 80 pruned. 
[I 2023-05-17 11:45:04,041] Trial 81 pruned. 
[I 2023-05-17 11:50:43,194] Trial 82 finished with value: 0.9266576039490881 and parameters: {'embedding_dim': 9, 'step_size': 0.1522109531308001, 'batch_size': 32, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 82 with value: 0.9266576039490881.
[I 2023-05-17 11:56:33,805] Trial 83 finished with value: 0.9245113241844185 and parameters: {'embedding_dim': 9, 'step_size': 0.15387178841176216, 'batch_size': 32, 'num_bins': 13, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185.
[I 2023-05-17 11:58:45,128] Trial 84 pruned. 
[I 2023-05-17 11:59:14,857] Trial 85 pruned. 
[I 2023-05-17 11:59:42,059] Trial 86 pruned. 
[I 2023-05-17 12:05:17,455] Trial 87 finished with value: 0.9391445358927347 and parameters: {'embedding_dim': 7, 'step_size': 0.1248214132622492, 'batch_size': 32, 'num_bins': 9, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185.
[I 2023-05-17 12:05:50,993] Trial 88 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 12:06:22,748] Trial 89 pruned. 
[I 2023-05-17 12:11:29,341] Trial 90 finished with value: 0.9454279427145241 and parameters: {'embedding_dim': 8, 'step_size': 0.2141758242978807, 'batch_size': 39, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185.
[I 2023-05-17 12:17:19,461] Trial 91 finished with value: 0.9352401835954536 and parameters: {'embedding_dim': 6, 'step_size': 0.12725599471418103, 'batch_size': 32, 'num_bins': 10, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185.
[I 2023-05-17 12:17:58,751] Trial 92 pruned. 
[I 2023-05-17 12:22:01,432] Trial 93 finished with value: 0.931574021363825 and parameters: {'embedding_dim': 9, 'step_size': 0.15452133804262197, 'batch_size': 47, 'num_bins': 10, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 83 with value: 0.9245113241844185.
[I 2023-05-17 12:23:34,558] Trial 94 pruned. 
[I 2023-05-17 12:31:55,624] Trial 95 finished with value: 0.9200841999395952 and parameters: {'embedding_dim': 9, 'step_size': 0.1937905753882621, 'batch_size': 32, 'num_bins': 11, 'bin_strategy': 'uniform', 'num_epochs': 12}. Best is trial 95 with value: 0.9200841999395952.
[I 2023-05-17 12:32:38,081] Trial 96 pruned. 
In [23]:
study_bins.best_params
Out[23]:
{'embedding_dim': 9,
 'step_size': 0.18730626111573867,
 'batch_size': 36,
 'num_bins': 12,
 'bin_strategy': 'uniform',
 'num_epochs': 12}
In [24]:
trial = study_bins.best_trial

print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
Test loss: 0.9178594970619922
Best hyperparameters: {'embedding_dim': 9, 'step_size': 0.18730626111573867, 'batch_size': 36, 'num_bins': 12, 'bin_strategy': 'uniform', 'num_epochs': 12}
In [ ]:
train_bin_fm(**study_bins.best_params)
In [ ]:
spline_losses = []
for i in trange(20):
    loss = train_spline_fm(**study.best_params)
    spline_losses.append(math.sqrt(loss))
In [ ]:
spline_losses
In [27]:
bin_losses = []
for i in trange(20):
    loss = train_bin_fm(**study_bins.best_params)
    bin_losses.append(math.sqrt(loss))
100%|██████████| 20/20 [2:32:40<00:00, 458.00s/it]  
In [28]:
bin_losses
Out[28]:
[0.925238985686155,
 0.9163177902831852,
 0.9152160603623787,
 0.9213131631137549,
 0.9204777295631849,
 0.9132863523472907,
 0.9249483055413078,
 0.9170556904637397,
 0.9165784330963357,
 0.9197557627379956,
 0.9206266194191419,
 0.9143788694976471,
 0.9135150737364409,
 0.9184024841237616,
 0.92070997297923,
 0.9137055441795998,
 0.9229001632891167,
 0.9173062795121563,
 0.9164100237132079,
 0.9234843995759882]
In [29]:
np.mean(bin_losses), np.std(bin_losses), 100 * (np.std(bin_losses) / np.mean(bin_losses))
Out[29]:
(0.918581385161081, 0.0036946303692452147, 0.402210455048284)
In [32]:
100 * (0.880369256648293 / np.mean(bin_losses) - 1)
Out[32]:
-4.159906692000637