In [1]:
import sys

sys.path.insert(0, "../utils")
In [27]:
import sklearn.datasets as skds
from sklearn.preprocessing import QuantileTransformer, KBinsDiscretizer, OrdinalEncoder, LabelEncoder
import numpy as np
import pandas as pd
from transformation import BSplineTransformer, spline_transform_dataset
from trainers import FFMTrainer
import math
import optuna
import optuna.samplers
from typing import Callable
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset
from tqdm import trange
In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
print(device)
cuda:0
In [4]:
torch.manual_seed(42)
np.random.seed(42)
In [5]:
raw_df = pd.read_csv("../data/adult-all.txt",
                      names=["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Martial Status",
                             "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss",
                             "Hours per week", "Country", "Target"],
                      dtype={0:int, 1:str, 2:int, 3:str, 4:int, 5: str, 6:str ,
                             7:str ,8:str ,9: str, 10:int, 11:int, 12:int, 13:str,14: str},
                      na_values="?")
In [6]:
raw_df.sample(20)
Out[6]:
Age Workclass fnlwgt Education Education-Num Martial Status Occupation Relationship Race Sex Capital Gain Capital Loss Hours per week Country Target
7762 18 Private 423024 HS-grad 9 Never-married Other-service Not-in-family White Male 0 0 20 United-States <=50K
23881 17 Private 178953 12th 8 Never-married Sales Own-child White Female 0 0 20 United-States <=50K
30507 25 Local-gov 348986 HS-grad 9 Never-married Handlers-cleaners Other-relative Black Male 0 0 40 United-States <=50K
28911 20 Private 218215 Some-college 10 Never-married Sales Own-child White Female 0 0 30 United-States <=50K
19484 47 Private 244025 HS-grad 9 Never-married Machine-op-inspct Unmarried Amer-Indian-Eskimo Male 0 0 56 Puerto-Rico <=50K
43031 33 Private 399531 Bachelors 13 Married-civ-spouse Craft-repair Husband Black Male 0 0 40 United-States <=50K
28188 38 Private 200220 HS-grad 9 Married-civ-spouse Craft-repair Husband White Male 0 0 40 United-States <=50K
12761 21 Private 329530 11th 7 Married-civ-spouse Craft-repair Husband White Male 0 0 40 Mexico <=50K
40834 43 Private 282155 Assoc-acdm 12 Divorced Prof-specialty Not-in-family White Female 4650 0 40 United-States <=50K
27875 55 Private 202220 HS-grad 9 Married-civ-spouse Other-service Wife Black Female 2407 0 35 United-States <=50K
1276 46 Private 129007 Bachelors 13 Married-civ-spouse Sales Husband White Male 0 1977 40 United-States >50K
22608 34 Private 261799 Assoc-voc 11 Married-civ-spouse Adm-clerical Husband Black Male 0 0 45 United-States >50K
36230 40 NaN 246862 Bachelors 13 Widowed NaN Not-in-family White Female 0 0 8 United-States <=50K
13398 50 Private 173754 HS-grad 9 Married-civ-spouse Craft-repair Husband White Male 0 0 40 United-States <=50K
43536 29 Private 176727 Some-college 10 Never-married Craft-repair Not-in-family White Male 0 0 38 United-States <=50K
18627 62 Private 24515 9th 5 Married-civ-spouse Exec-managerial Husband White Male 0 0 40 United-States <=50K
38424 56 Private 158776 11th 7 Married-civ-spouse Sales Husband White Male 0 0 55 United-States <=50K
35505 40 Federal-gov 90737 Some-college 10 Married-civ-spouse Adm-clerical Husband White Male 0 1887 40 United-States >50K
2372 74 NaN 340939 9th 5 Married-civ-spouse NaN Husband White Male 3471 0 40 United-States <=50K
3375 21 Private 305874 Some-college 10 Married-civ-spouse Craft-repair Husband White Male 0 0 54 United-States <=50K
In [7]:
raw_df.columns
Out[7]:
Index(['Age', 'Workclass', 'fnlwgt', 'Education', 'Education-Num',
       'Martial Status', 'Occupation', 'Relationship', 'Race', 'Sex',
       'Capital Gain', 'Capital Loss', 'Hours per week', 'Country', 'Target'],
      dtype='object')
In [8]:
categorical_columns = ['Workclass', 'Education', 'Martial Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country']
numerical_columns = ['Age', 'fnlwgt', 'Education-Num', 'Capital Gain', 'Capital Loss', 'Hours per week']
In [9]:
na_dict = {col: f'NA_{col}' for col in categorical_columns}
cat_ordinal = raw_df.fillna(na_dict)
cat_ordinal[categorical_columns] = OrdinalEncoder().fit_transform(cat_ordinal[categorical_columns])
cat_ordinal["Target"] = LabelEncoder().fit_transform(cat_ordinal["Target"])
In [10]:
train, test = train_test_split(cat_ordinal, test_size=0.2, random_state=42)
In [11]:
tr_cat = train[categorical_columns]
tr_num = train[numerical_columns]
tr_target = train["Target"]

te_cat = test[categorical_columns]
te_num = test[numerical_columns]
te_target = test["Target"]
In [12]:
tr_num_qs = []
te_num_qs = []
special_values = dict()
for col_idx, col in enumerate(tr_num.columns):
    tr_col = tr_num.iloc[:, col_idx].to_numpy().astype(np.float32)
    te_col = te_num.iloc[:, col_idx].to_numpy().astype(np.float32)

    if col in ['Capital Loss', 'Capital Gain']:
        regular_tr_mask = tr_col > 0
        regular_te_mask = te_col > 0
        tr_col[~regular_tr_mask] = -1.
        te_col[~regular_te_mask] = -1.
        special_values[col_idx] = [-1.]
    else:
        regular_tr_mask = np.ones_like(tr_col, dtype=bool)
        regular_te_mask = np.ones_like(te_col, dtype=bool)

    transformer = QuantileTransformer(subsample=np.sum(regular_tr_mask), output_distribution='uniform')
    tr_col[regular_tr_mask] = transformer.fit_transform(tr_col[regular_tr_mask].reshape(-1, 1)).reshape(-1)
    te_col[regular_te_mask] = transformer.transform(te_col[regular_te_mask].reshape(-1, 1)).reshape(-1)

    tr_num_qs.append(tr_col)
    te_num_qs.append(te_col)

tr_num_qs = np.stack(tr_num_qs, axis=1)
te_num_qs = np.stack(te_num_qs, axis=1)
In [13]:
num_cat_fields = tr_cat.shape[1]
cat_offsets = np.cumsum([0] + [cat_ordinal[col].nunique() for col in categorical_columns])
num_cat_embeddings = cat_offsets[-1]
cat_offsets = cat_offsets[:-1]

tr_cat_indices = tr_cat.values + np.tile(cat_offsets, (len(tr_cat), 1))
tr_cat_weights = np.ones_like(tr_cat_indices, dtype=np.float32)
tr_cat_offsets = np.tile(np.arange(num_cat_fields, dtype=np.int32), (tr_cat.shape[0], 1))
tr_cat_fields = tr_cat_offsets

te_cat_indices = te_cat.values + np.tile(cat_offsets, (len(te_cat), 1))
te_cat_weights = np.ones_like(te_cat_indices, dtype=np.float32)
te_cat_offsets = np.tile(np.arange(num_cat_fields, dtype=np.int32), (te_cat.shape[0], 1))
te_cat_fields = te_cat_offsets
In [14]:
def train_spline_ffm(embedding_dim: int, step_size: float, batch_size: int, num_knots: int, num_epochs: int,
                     callback: Callable[[int, float], None]=None):
    bs = BSplineTransformer(num_knots, 3)
    tr_num_indices, tr_num_weights, tr_num_offsets, tr_num_fields = spline_transform_dataset(tr_num_qs, bs, special_values=special_values)
    te_num_indices, te_num_weights, te_num_offsets, te_num_fields = spline_transform_dataset(te_num_qs, bs, special_values=special_values)

    num_numerical_fields = tr_num_qs.shape[1]
    num_numerical_embeddings = int(max(np.max(tr_num_indices), np.max(te_num_indices)) + 1)

    num_fields = num_numerical_fields + num_cat_fields
    num_embeddings = num_numerical_embeddings + num_cat_embeddings

    tr_indices = np.concatenate([tr_cat_indices, tr_num_indices + num_cat_embeddings], axis=1)
    tr_weights = np.concatenate([tr_cat_weights, tr_num_weights], axis=1)
    tr_offsets = np.concatenate([tr_cat_offsets, tr_num_offsets + num_cat_fields], axis=1)
    tr_fields = np.concatenate([tr_cat_fields, tr_num_fields + num_cat_fields], axis=1)

    te_indices = np.concatenate([te_cat_indices, te_num_indices + num_cat_embeddings], axis=1)
    te_weights = np.concatenate([te_cat_weights, te_num_weights], axis=1)
    te_offsets = np.concatenate([te_cat_offsets, te_num_offsets + num_cat_fields], axis=1)
    te_fields = np.concatenate([te_cat_fields, te_num_fields + num_cat_fields], axis=1)

    train_ds = TensorDataset(
        torch.tensor(tr_indices, dtype=torch.int64),
        torch.tensor(tr_weights, dtype=torch.float32),
        torch.tensor(tr_offsets, dtype=torch.int64),
        torch.tensor(tr_fields, dtype=torch.int64),
        torch.tensor(tr_target.values, dtype=torch.float32))

    test_ds = TensorDataset(
        torch.tensor(te_indices, dtype=torch.int64),
        torch.tensor(te_weights, dtype=torch.float32),
        torch.tensor(te_offsets, dtype=torch.int64),
        torch.tensor(te_fields, dtype=torch.int64),
        torch.tensor(te_target.values, dtype=torch.float32))


    trainer = FFMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
    return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.BCEWithLogitsLoss(), device)
In [15]:
def train_spline_objective(trial: optuna.Trial):
    embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
    step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
    batch_size = trial.suggest_int('batch_size', 2, 32)
    num_knots = trial.suggest_int('num_knots', 3, 48)
    num_epochs = trial.suggest_int('num_epochs', 5, 15)

    def callback(epoch: int, loss: float):
        trial.report(loss, epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return train_spline_ffm(embedding_dim, step_size, batch_size, num_knots, num_epochs,
                           callback=callback)
In [16]:
study = optuna.create_study(study_name='splines',
                            direction='minimize',
                            sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(train_spline_objective, n_trials=100)
[I 2023-05-16 19:04:42,736] A new study created in memory with name: splines
[I 2023-05-16 19:06:55,632] Trial 0 finished with value: 0.3308318555355072 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 24, 'num_knots': 30, 'num_epochs': 6}. Best is trial 0 with value: 0.3308318555355072.
[I 2023-05-16 19:11:10,824] Trial 1 finished with value: 0.2987254559993744 and parameters: {'embedding_dim': 2, 'step_size': 0.012551115172973842, 'batch_size': 28, 'num_knots': 30, 'num_epochs': 12}. Best is trial 1 with value: 0.2987254559993744.
[I 2023-05-16 19:13:41,166] Trial 2 finished with value: 0.308167040348053 and parameters: {'embedding_dim': 1, 'step_size': 0.44447541666908114, 'batch_size': 27, 'num_knots': 12, 'num_epochs': 7}. Best is trial 1 with value: 0.2987254559993744.
[I 2023-05-16 19:18:26,030] Trial 3 finished with value: 0.2933942675590515 and parameters: {'embedding_dim': 2, 'step_size': 0.0328774741399112, 'batch_size': 18, 'num_knots': 22, 'num_epochs': 8}. Best is trial 3 with value: 0.2933942675590515.
[I 2023-05-16 19:26:54,210] Trial 4 finished with value: 0.2895911633968353 and parameters: {'embedding_dim': 7, 'step_size': 0.017258215396625, 'batch_size': 11, 'num_knots': 19, 'num_epochs': 10}. Best is trial 4 with value: 0.2895911633968353.
[I 2023-05-16 19:29:16,986] Trial 5 finished with value: 0.28712841868400574 and parameters: {'embedding_dim': 8, 'step_size': 0.021839352923182977, 'batch_size': 17, 'num_knots': 30, 'num_epochs': 5}. Best is trial 5 with value: 0.28712841868400574.
[I 2023-05-16 20:01:38,830] Trial 6 finished with value: 0.28596022725105286 and parameters: {'embedding_dim': 7, 'step_size': 0.019485671251272575, 'batch_size': 4, 'num_knots': 46, 'num_epochs': 15}. Best is trial 6 with value: 0.28596022725105286.
[I 2023-05-16 20:18:48,998] Trial 7 finished with value: 0.29043394327163696 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 5, 'num_knots': 34, 'num_epochs': 9}. Best is trial 6 with value: 0.28596022725105286.
[I 2023-05-16 20:41:45,796] Trial 8 finished with value: 0.28961437940597534 and parameters: {'embedding_dim': 2, 'step_size': 0.06938901412739397, 'batch_size': 3, 'num_knots': 44, 'num_epochs': 7}. Best is trial 6 with value: 0.28596022725105286.
[I 2023-05-16 20:42:23,897] Trial 9 pruned. 
[I 2023-05-16 20:43:21,803] Trial 10 pruned. 
[I 2023-05-16 20:54:52,689] Trial 11 pruned. 
[I 2023-05-16 20:55:24,798] Trial 12 pruned. 
[I 2023-05-16 21:00:02,801] Trial 13 pruned. 
[I 2023-05-16 21:01:24,320] Trial 14 pruned. 
[I 2023-05-16 21:08:49,223] Trial 15 pruned. 
[I 2023-05-16 21:42:43,899] Trial 16 pruned. 
[I 2023-05-16 21:46:03,522] Trial 17 finished with value: 0.28969407081604004 and parameters: {'embedding_dim': 6, 'step_size': 0.021721521376611954, 'batch_size': 14, 'num_knots': 24, 'num_epochs': 5}. Best is trial 6 with value: 0.28596022725105286.
[I 2023-05-16 21:50:04,614] Trial 18 finished with value: 0.28562605381011963 and parameters: {'embedding_dim': 9, 'step_size': 0.01404915501132402, 'batch_size': 21, 'num_knots': 34, 'num_epochs': 9}. Best is trial 18 with value: 0.28562605381011963.
[I 2023-05-16 21:50:30,101] Trial 19 pruned. 
[I 2023-05-16 21:51:59,745] Trial 20 pruned. 
[I 2023-05-16 21:56:23,019] Trial 21 finished with value: 0.28827688097953796 and parameters: {'embedding_dim': 8, 'step_size': 0.022702957394650548, 'batch_size': 21, 'num_knots': 34, 'num_epochs': 10}. Best is trial 18 with value: 0.28562605381011963.
[I 2023-05-16 21:56:45,753] Trial 22 pruned. 
[I 2023-05-16 21:57:18,215] Trial 23 pruned. 
[I 2023-05-16 21:57:30,485] Trial 24 pruned. 
[I 2023-05-16 22:01:21,461] Trial 25 finished with value: 0.28463757038116455 and parameters: {'embedding_dim': 10, 'step_size': 0.015507328009848734, 'batch_size': 19, 'num_knots': 42, 'num_epochs': 8}. Best is trial 25 with value: 0.28463757038116455.
[I 2023-05-16 22:01:50,284] Trial 26 pruned. 
[I 2023-05-16 22:08:48,600] Trial 27 finished with value: 0.28615275025367737 and parameters: {'embedding_dim': 9, 'step_size': 0.014103190106531065, 'batch_size': 13, 'num_knots': 48, 'num_epochs': 10}. Best is trial 25 with value: 0.28463757038116455.
[I 2023-05-16 22:12:45,217] Trial 28 finished with value: 0.2895828187465668 and parameters: {'embedding_dim': 10, 'step_size': 0.02582821902928931, 'batch_size': 24, 'num_knots': 37, 'num_epochs': 11}. Best is trial 25 with value: 0.28463757038116455.
[I 2023-05-16 22:15:06,625] Trial 29 pruned. 
[I 2023-05-16 22:17:01,477] Trial 30 pruned. 
[I 2023-05-16 22:17:42,523] Trial 31 pruned. 
[I 2023-05-16 22:25:47,914] Trial 32 finished with value: 0.28570377826690674 and parameters: {'embedding_dim': 10, 'step_size': 0.016148405038761453, 'batch_size': 10, 'num_knots': 41, 'num_epochs': 9}. Best is trial 25 with value: 0.28463757038116455.
[I 2023-05-16 22:34:32,024] Trial 33 finished with value: 0.28472790122032166 and parameters: {'embedding_dim': 10, 'step_size': 0.017898973177799225, 'batch_size': 10, 'num_knots': 41, 'num_epochs': 9}. Best is trial 25 with value: 0.28463757038116455.
[I 2023-05-16 22:35:27,217] Trial 34 pruned. 
[I 2023-05-16 22:45:38,667] Trial 35 finished with value: 0.2841481864452362 and parameters: {'embedding_dim': 10, 'step_size': 0.01628886658284197, 'batch_size': 7, 'num_knots': 41, 'num_epochs': 8}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-16 22:49:26,856] Trial 36 pruned. 
[I 2023-05-16 22:49:50,087] Trial 37 pruned. 
[I 2023-05-16 22:50:09,745] Trial 38 pruned. 
[I 2023-05-16 22:54:31,498] Trial 39 finished with value: 0.2877384126186371 and parameters: {'embedding_dim': 10, 'step_size': 0.029506218527871258, 'batch_size': 17, 'num_knots': 36, 'num_epochs': 8}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-16 22:56:01,040] Trial 40 pruned. 
[I 2023-05-16 22:56:55,935] Trial 41 pruned. 
[I 2023-05-16 23:07:52,600] Trial 42 finished with value: 0.284860223531723 and parameters: {'embedding_dim': 10, 'step_size': 0.015781427803654453, 'batch_size': 8, 'num_knots': 44, 'num_epochs': 10}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-16 23:10:03,298] Trial 43 pruned. 
[I 2023-05-16 23:12:15,603] Trial 44 pruned. 
[I 2023-05-16 23:12:44,465] Trial 45 pruned. 
[I 2023-05-16 23:13:28,673] Trial 46 pruned. 
[I 2023-05-16 23:35:02,584] Trial 47 finished with value: 0.2866511940956116 and parameters: {'embedding_dim': 9, 'step_size': 0.019474627680383216, 'batch_size': 4, 'num_knots': 46, 'num_epochs': 10}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-16 23:35:35,885] Trial 48 pruned. 
[I 2023-05-16 23:48:37,070] Trial 49 finished with value: 0.285453736782074 and parameters: {'embedding_dim': 10, 'step_size': 0.016255622278133826, 'batch_size': 8, 'num_knots': 38, 'num_epochs': 12}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-16 23:51:53,340] Trial 50 pruned. 
[I 2023-05-16 23:53:20,770] Trial 51 pruned. 
[I 2023-05-17 00:45:03,902] Trial 52 finished with value: 0.28706908226013184 and parameters: {'embedding_dim': 10, 'step_size': 0.018306112716872682, 'batch_size': 2, 'num_knots': 38, 'num_epochs': 12}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-17 00:45:53,635] Trial 53 pruned. 
[I 2023-05-17 00:58:42,623] Trial 54 finished with value: 0.28566741943359375 and parameters: {'embedding_dim': 10, 'step_size': 0.02147460300997374, 'batch_size': 7, 'num_knots': 38, 'num_epochs': 10}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-17 00:59:38,518] Trial 55 pruned. 
[I 2023-05-17 01:01:27,214] Trial 56 pruned. 
[I 2023-05-17 01:05:33,988] Trial 57 finished with value: 0.28643038868904114 and parameters: {'embedding_dim': 9, 'step_size': 0.02361589004001423, 'batch_size': 18, 'num_knots': 46, 'num_epochs': 8}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-17 01:06:10,450] Trial 58 pruned. 
[I 2023-05-17 01:07:03,684] Trial 59 pruned. 
[I 2023-05-17 01:07:28,041] Trial 60 pruned. 
[I 2023-05-17 01:09:59,660] Trial 61 pruned. 
[I 2023-05-17 01:11:06,941] Trial 62 pruned. 
[I 2023-05-17 01:12:28,059] Trial 63 pruned. 
[I 2023-05-17 01:14:42,563] Trial 64 pruned. 
[I 2023-05-17 01:15:20,984] Trial 65 pruned. 
[I 2023-05-17 01:15:40,772] Trial 66 pruned. 
[I 2023-05-17 01:16:32,698] Trial 67 pruned. 
[I 2023-05-17 01:17:15,265] Trial 68 pruned. 
[I 2023-05-17 01:17:47,724] Trial 69 pruned. 
[I 2023-05-17 01:18:59,351] Trial 70 pruned. 
[I 2023-05-17 01:20:21,071] Trial 71 pruned. 
[I 2023-05-17 01:21:05,797] Trial 72 pruned. 
[I 2023-05-17 01:21:44,119] Trial 73 pruned. 
[I 2023-05-17 01:22:41,889] Trial 74 pruned. 
[I 2023-05-17 01:23:15,626] Trial 75 pruned. 
[I 2023-05-17 01:23:56,832] Trial 76 pruned. 
[I 2023-05-17 01:25:02,969] Trial 77 pruned. 
[I 2023-05-17 01:25:39,164] Trial 78 pruned. 
[I 2023-05-17 01:26:04,023] Trial 79 pruned. 
[I 2023-05-17 01:26:22,837] Trial 80 pruned. 
[I 2023-05-17 01:28:40,401] Trial 81 pruned. 
[I 2023-05-17 01:30:21,125] Trial 82 pruned. 
[I 2023-05-17 01:31:13,173] Trial 83 pruned. 
[I 2023-05-17 01:33:48,348] Trial 84 pruned. 
[I 2023-05-17 01:48:24,829] Trial 85 finished with value: 0.2861608564853668 and parameters: {'embedding_dim': 10, 'step_size': 0.017533967555932624, 'batch_size': 6, 'num_knots': 44, 'num_epochs': 13}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-17 01:48:46,518] Trial 86 pruned. 
[I 2023-05-17 01:49:08,314] Trial 87 pruned. 
[I 2023-05-17 01:55:55,865] Trial 88 finished with value: 0.2855015993118286 and parameters: {'embedding_dim': 10, 'step_size': 0.016438184664674376, 'batch_size': 14, 'num_knots': 39, 'num_epochs': 14}. Best is trial 35 with value: 0.2841481864452362.
[I 2023-05-17 01:56:24,886] Trial 89 pruned. 
[I 2023-05-17 01:56:51,441] Trial 90 pruned. 
[I 2023-05-17 01:58:58,730] Trial 91 pruned. 
[I 2023-05-17 01:59:41,106] Trial 92 pruned. 
[I 2023-05-17 02:00:32,523] Trial 93 pruned. 
[I 2023-05-17 02:01:10,092] Trial 94 pruned. 
[I 2023-05-17 02:01:40,310] Trial 95 pruned. 
[I 2023-05-17 02:19:21,485] Trial 96 pruned. 
[I 2023-05-17 02:19:51,793] Trial 97 pruned. 
[I 2023-05-17 02:21:00,049] Trial 98 pruned. 
[I 2023-05-17 02:22:32,640] Trial 99 pruned. 
In [17]:
trial = study.best_trial

print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
Test loss: 0.2841481864452362
Best hyperparameters: {'embedding_dim': 10, 'step_size': 0.01628886658284197, 'batch_size': 7, 'num_knots': 41, 'num_epochs': 8}
In [18]:
study.best_params
Out[18]:
{'embedding_dim': 10,
 'step_size': 0.01628886658284197,
 'batch_size': 7,
 'num_knots': 41,
 'num_epochs': 8}
In [19]:
train_spline_ffm(**study.best_params)
Out[19]:
0.28711041808128357
In [28]:
spline_losses = []
for i in trange(20):
    loss = train_spline_ffm(**study.best_params)
    spline_losses.append(loss)
100%|██████████| 20/20 [2:32:39<00:00, 457.99s/it]  
In [29]:
spline_losses
Out[29]:
[0.28441962599754333,
 0.28561073541641235,
 0.28526991605758667,
 0.2852311432361603,
 0.2871018052101135,
 0.28534361720085144,
 0.2867548167705536,
 0.2859247922897339,
 0.28690609335899353,
 0.28653082251548767,
 0.28614985942840576,
 0.2878064811229706,
 0.286538690328598,
 0.2865554690361023,
 0.28602224588394165,
 0.2861751914024353,
 0.2853567898273468,
 0.28668129444122314,
 0.2849292755126953,
 0.2857675850391388]
In [33]:
np.mean(spline_losses), np.std(spline_losses)
Out[33]:
(0.2860538125038147, 0.0008072314680490749)
In [20]:
def train_bin_ffm(embedding_dim: int, step_size: float, batch_size: int,
                  num_bins: int, bin_strategy: str, num_epochs: int,
                  callback: Callable[[int, float], None]=None):
    num_numerical_fields = tr_num_qs.shape[1]
    num_numerical_embeddings = num_numerical_fields * num_bins
    numerical_offsets = np.arange(0, num_numerical_fields) * num_bins

    discretizer = KBinsDiscretizer(num_bins, encode='ordinal', strategy=bin_strategy, random_state=42)
    discretizer.fit(tr_num)

    tr_num_indices = discretizer.transform(tr_num)
    tr_num_indices += np.tile(numerical_offsets, (tr_num.shape[0], 1))
    tr_num_weights = np.ones_like(tr_num_indices)
    tr_num_fields = np.tile(np.arange(0, num_numerical_fields), (tr_num.shape[0], 1))
    tr_num_offsets = tr_num_fields.copy()

    te_num_indices = discretizer.transform(te_num)
    te_num_indices += np.tile(numerical_offsets, (te_num.shape[0], 1))
    te_num_weights = np.ones_like(te_num_indices)
    te_num_fields = np.tile(np.arange(0, num_numerical_fields), (te_num.shape[0], 1))
    te_num_offsets = te_num_fields.copy()


    num_fields = num_numerical_fields + num_cat_fields
    num_embeddings = num_numerical_embeddings + num_cat_embeddings

    tr_indices = np.concatenate([tr_cat_indices, tr_num_indices + num_cat_embeddings], axis=1)
    tr_weights = np.concatenate([tr_cat_weights, tr_num_weights], axis=1)
    tr_offsets = np.concatenate([tr_cat_offsets, tr_num_offsets + num_cat_fields], axis=1)
    tr_fields = np.concatenate([tr_cat_fields, tr_num_fields + num_cat_fields], axis=1)

    te_indices = np.concatenate([te_cat_indices, te_num_indices + num_cat_embeddings], axis=1)
    te_weights = np.concatenate([te_cat_weights, te_num_weights], axis=1)
    te_offsets = np.concatenate([te_cat_offsets, te_num_offsets + num_cat_fields], axis=1)
    te_fields = np.concatenate([te_cat_fields, te_num_fields + num_cat_fields], axis=1)

    train_ds = TensorDataset(
        torch.tensor(tr_indices, dtype=torch.int64),
        torch.tensor(tr_weights, dtype=torch.float32),
        torch.tensor(tr_offsets, dtype=torch.int64),
        torch.tensor(tr_fields, dtype=torch.int64),
        torch.tensor(tr_target.values, dtype=torch.float32))

    test_ds = TensorDataset(
        torch.tensor(te_indices, dtype=torch.int64),
        torch.tensor(te_weights, dtype=torch.float32),
        torch.tensor(te_offsets, dtype=torch.int64),
        torch.tensor(te_fields, dtype=torch.int64),
        torch.tensor(te_target.values, dtype=torch.float32))

    trainer = FFMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
    return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.BCEWithLogitsLoss(), device)
In [21]:
def test_bins_objective(trial: optuna.Trial):
    embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
    step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
    batch_size = trial.suggest_int('batch_size', 2, 32)
    num_bins = trial.suggest_int('num_bins', 2, 100)
    bin_strategy = trial.suggest_categorical('bin_strategy', ['uniform', 'quantile'])
    num_epochs = trial.suggest_int('num_epochs', 5, 15)

    def callback(epoch: int, loss: float):
        trial.report(loss, epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return train_bin_ffm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs,
                         callback=callback)
In [22]:
study_bins = optuna.create_study(study_name='bins',
                                 direction='minimize',
                                 sampler=optuna.samplers.TPESampler(seed=42))
study_bins.optimize(test_bins_objective, n_trials=100)
[I 2023-05-17 02:30:20,329] A new study created in memory with name: bins
[I 2023-05-17 02:31:48,083] Trial 0 finished with value: 0.36048880219459534 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 24, 'num_bins': 61, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 0 with value: 0.36048880219459534.
[I 2023-05-17 02:33:58,528] Trial 1 finished with value: 0.35097000002861023 and parameters: {'embedding_dim': 9, 'step_size': 0.10502105436744279, 'batch_size': 23, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.35097000002861023.
[I 2023-05-17 02:40:40,921] Trial 2 finished with value: 0.3105776607990265 and parameters: {'embedding_dim': 2, 'step_size': 0.020492680115417352, 'batch_size': 11, 'num_bins': 53, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 2 with value: 0.3105776607990265.
[I 2023-05-17 02:45:58,507] Trial 3 finished with value: 0.30633899569511414 and parameters: {'embedding_dim': 2, 'step_size': 0.03135775732257745, 'batch_size': 13, 'num_bins': 47, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 3 with value: 0.30633899569511414.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 02:51:06,519] Trial 4 finished with value: 0.32146045565605164 and parameters: {'embedding_dim': 6, 'step_size': 0.011992724522955167, 'batch_size': 20, 'num_bins': 18, 'bin_strategy': 'quantile', 'num_epochs': 15}. Best is trial 3 with value: 0.30633899569511414.
[I 2023-05-17 03:04:29,678] Trial 5 finished with value: 0.3089120388031006 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 5, 'num_bins': 69, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 3 with value: 0.30633899569511414.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 03:08:36,422] Trial 6 pruned. 
[I 2023-05-17 03:09:57,126] Trial 7 pruned. 
[I 2023-05-17 03:12:11,602] Trial 8 pruned. 
[I 2023-05-17 03:18:03,071] Trial 9 finished with value: 0.3057221472263336 and parameters: {'embedding_dim': 4, 'step_size': 0.030012301808980443, 'batch_size': 18, 'num_bins': 15, 'bin_strategy': 'uniform', 'num_epochs': 15}. Best is trial 9 with value: 0.3057221472263336.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 03:18:16,684] Trial 10 pruned. 
[I 2023-05-17 03:22:39,833] Trial 11 finished with value: 0.30919402837753296 and parameters: {'embedding_dim': 4, 'step_size': 0.04642647300540488, 'batch_size': 14, 'num_bins': 30, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 9 with value: 0.3057221472263336.
[I 2023-05-17 03:26:09,676] Trial 12 finished with value: 0.3141358494758606 and parameters: {'embedding_dim': 4, 'step_size': 0.08128484055802063, 'batch_size': 16, 'num_bins': 36, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 9 with value: 0.3057221472263336.
[I 2023-05-17 03:26:54,554] Trial 13 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 03:28:24,068] Trial 14 pruned. 
[I 2023-05-17 03:34:47,198] Trial 15 finished with value: 0.3047816753387451 and parameters: {'embedding_dim': 3, 'step_size': 0.044922957549390394, 'batch_size': 14, 'num_bins': 46, 'bin_strategy': 'uniform', 'num_epochs': 13}. Best is trial 15 with value: 0.3047816753387451.
[I 2023-05-17 03:36:42,706] Trial 16 pruned. 
[I 2023-05-17 03:42:06,253] Trial 17 finished with value: 0.32851821184158325 and parameters: {'embedding_dim': 7, 'step_size': 0.051868823187409284, 'batch_size': 17, 'num_bins': 80, 'bin_strategy': 'uniform', 'num_epochs': 13}. Best is trial 15 with value: 0.3047816753387451.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 03:43:04,642] Trial 18 pruned. 
[I 2023-05-17 03:43:24,763] Trial 19 pruned. 
[I 2023-05-17 03:48:35,377] Trial 20 finished with value: 0.3056541383266449 and parameters: {'embedding_dim': 10, 'step_size': 0.020693482079285105, 'batch_size': 16, 'num_bins': 38, 'bin_strategy': 'uniform', 'num_epochs': 12}. Best is trial 15 with value: 0.3047816753387451.
[I 2023-05-17 03:53:51,363] Trial 21 finished with value: 0.3041217029094696 and parameters: {'embedding_dim': 10, 'step_size': 0.017182589531927382, 'batch_size': 16, 'num_bins': 40, 'bin_strategy': 'uniform', 'num_epochs': 12}. Best is trial 21 with value: 0.3041217029094696.
[I 2023-05-17 03:59:26,777] Trial 22 finished with value: 0.30423784255981445 and parameters: {'embedding_dim': 10, 'step_size': 0.015889555648010128, 'batch_size': 15, 'num_bins': 44, 'bin_strategy': 'uniform', 'num_epochs': 12}. Best is trial 21 with value: 0.3041217029094696.
[I 2023-05-17 03:59:58,972] Trial 23 pruned. 
[I 2023-05-17 04:00:28,524] Trial 24 pruned. 
[I 2023-05-17 04:01:15,985] Trial 25 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 04:01:46,624] Trial 26 pruned. 
[I 2023-05-17 04:08:06,495] Trial 27 finished with value: 0.30776745080947876 and parameters: {'embedding_dim': 10, 'step_size': 0.02266745511267857, 'batch_size': 15, 'num_bins': 74, 'bin_strategy': 'uniform', 'num_epochs': 14}. Best is trial 21 with value: 0.3041217029094696.
[I 2023-05-17 04:11:50,365] Trial 28 finished with value: 0.30118101835250854 and parameters: {'embedding_dim': 8, 'step_size': 0.013689434707013629, 'batch_size': 23, 'num_bins': 56, 'bin_strategy': 'uniform', 'num_epochs': 12}. Best is trial 28 with value: 0.30118101835250854.
[I 2023-05-17 04:14:59,191] Trial 29 finished with value: 0.29582858085632324 and parameters: {'embedding_dim': 8, 'step_size': 0.013772781946733741, 'batch_size': 25, 'num_bins': 58, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 04:15:16,142] Trial 30 pruned. 
[I 2023-05-17 04:18:41,663] Trial 31 finished with value: 0.3001174330711365 and parameters: {'embedding_dim': 9, 'step_size': 0.01600497838064572, 'batch_size': 23, 'num_bins': 58, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 04:18:59,106] Trial 32 pruned. 
[I 2023-05-17 04:19:10,543] Trial 33 pruned. 
[I 2023-05-17 04:19:27,642] Trial 34 pruned. 
[I 2023-05-17 04:22:01,930] Trial 35 finished with value: 0.30076929926872253 and parameters: {'embedding_dim': 9, 'step_size': 0.017679085410782114, 'batch_size': 28, 'num_bins': 50, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 04:24:15,945] Trial 36 finished with value: 0.3045254051685333 and parameters: {'embedding_dim': 9, 'step_size': 0.024278676446518384, 'batch_size': 29, 'num_bins': 52, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 04:24:30,824] Trial 37 pruned. 
[I 2023-05-17 04:27:24,617] Trial 38 finished with value: 0.3024236261844635 and parameters: {'embedding_dim': 8, 'step_size': 0.018602665611405316, 'batch_size': 25, 'num_bins': 71, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 04:27:48,650] Trial 39 pruned. 
[I 2023-05-17 04:28:26,064] Trial 40 pruned. 
[I 2023-05-17 04:31:18,026] Trial 41 finished with value: 0.2989811897277832 and parameters: {'embedding_dim': 8, 'step_size': 0.016781120570308682, 'batch_size': 25, 'num_bins': 72, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 04:31:32,313] Trial 42 pruned. 
[I 2023-05-17 04:34:19,356] Trial 43 finished with value: 0.3026280105113983 and parameters: {'embedding_dim': 9, 'step_size': 0.025610956679079467, 'batch_size': 26, 'num_bins': 81, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 04:34:52,190] Trial 44 pruned. 
[I 2023-05-17 04:37:51,600] Trial 45 finished with value: 0.30039864778518677 and parameters: {'embedding_dim': 9, 'step_size': 0.013926851210708779, 'batch_size': 24, 'num_bins': 69, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 04:38:09,223] Trial 46 pruned. 
[I 2023-05-17 04:38:25,507] Trial 47 pruned. 
[I 2023-05-17 04:38:39,778] Trial 48 pruned. 
[I 2023-05-17 04:38:56,972] Trial 49 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 04:39:17,432] Trial 50 pruned. 
[I 2023-05-17 04:39:55,277] Trial 51 pruned. 
[I 2023-05-17 04:40:41,806] Trial 52 pruned. 
[I 2023-05-17 04:41:04,478] Trial 53 pruned. 
[I 2023-05-17 04:41:24,107] Trial 54 pruned. 
[I 2023-05-17 04:41:41,254] Trial 55 pruned. 
[I 2023-05-17 04:41:58,017] Trial 56 pruned. 
[I 2023-05-17 04:42:10,630] Trial 57 pruned. 
[I 2023-05-17 04:42:26,297] Trial 58 pruned. 
[I 2023-05-17 04:42:46,767] Trial 59 pruned. 
[I 2023-05-17 04:46:59,616] Trial 60 finished with value: 0.30149054527282715 and parameters: {'embedding_dim': 9, 'step_size': 0.016542823348484607, 'batch_size': 18, 'num_bins': 100, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 04:51:16,197] Trial 61 finished with value: 0.298368364572525 and parameters: {'embedding_dim': 9, 'step_size': 0.016604846417793938, 'batch_size': 18, 'num_bins': 92, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 04:51:34,113] Trial 62 pruned. 
[I 2023-05-17 04:51:53,620] Trial 63 pruned. 
[I 2023-05-17 05:37:06,903] Trial 64 finished with value: 0.30193400382995605 and parameters: {'embedding_dim': 9, 'step_size': 0.016685807356309056, 'batch_size': 2, 'num_bins': 65, 'bin_strategy': 'uniform', 'num_epochs': 13}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 05:37:25,002] Trial 65 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 05:37:53,258] Trial 66 pruned. 
[I 2023-05-17 05:38:13,413] Trial 67 pruned. 
[I 2023-05-17 05:38:35,738] Trial 68 pruned. 
[I 2023-05-17 05:39:00,269] Trial 69 pruned. 
[I 2023-05-17 05:42:54,379] Trial 70 finished with value: 0.29890763759613037 and parameters: {'embedding_dim': 10, 'step_size': 0.016526218941787352, 'batch_size': 23, 'num_bins': 72, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 05:43:17,916] Trial 71 pruned. 
[I 2023-05-17 05:43:42,518] Trial 72 pruned. 
[I 2023-05-17 05:44:05,204] Trial 73 pruned. 
[I 2023-05-17 05:48:21,735] Trial 74 finished with value: 0.3004951477050781 and parameters: {'embedding_dim': 9, 'step_size': 0.014788329393552348, 'batch_size': 21, 'num_bins': 59, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 05:49:22,809] Trial 75 pruned. 
[I 2023-05-17 05:53:11,710] Trial 76 finished with value: 0.30084165930747986 and parameters: {'embedding_dim': 9, 'step_size': 0.015510325380640089, 'batch_size': 21, 'num_bins': 71, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 05:55:50,704] Trial 77 finished with value: 0.3046785593032837 and parameters: {'embedding_dim': 10, 'step_size': 0.024487933212022406, 'batch_size': 26, 'num_bins': 67, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 05:56:15,031] Trial 78 pruned. 
[I 2023-05-17 05:56:32,604] Trial 79 pruned. 
[I 2023-05-17 05:56:52,941] Trial 80 pruned. 
[I 2023-05-17 05:59:25,960] Trial 81 finished with value: 0.29877328872680664 and parameters: {'embedding_dim': 9, 'step_size': 0.015665965782853387, 'batch_size': 21, 'num_bins': 70, 'bin_strategy': 'uniform', 'num_epochs': 8}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 05:59:59,871] Trial 82 pruned. 
[I 2023-05-17 06:00:19,755] Trial 83 pruned. 
[I 2023-05-17 06:00:42,196] Trial 84 pruned. 
[I 2023-05-17 06:01:01,542] Trial 85 pruned. 
[I 2023-05-17 06:03:51,558] Trial 86 finished with value: 0.30616626143455505 and parameters: {'embedding_dim': 10, 'step_size': 0.02268770472091906, 'batch_size': 27, 'num_bins': 67, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 06:04:15,445] Trial 87 pruned. 
[I 2023-05-17 06:04:34,359] Trial 88 pruned. 
[I 2023-05-17 06:08:41,776] Trial 89 finished with value: 0.30008405447006226 and parameters: {'embedding_dim': 9, 'step_size': 0.017192731186566303, 'batch_size': 20, 'num_bins': 83, 'bin_strategy': 'uniform', 'num_epochs': 9}. Best is trial 29 with value: 0.29582858085632324.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 2 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 3 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 4 are removed. Consider decreasing the number of bins.
  warnings.warn(
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:291: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 5 are removed. Consider decreasing the number of bins.
  warnings.warn(
[I 2023-05-17 06:09:13,735] Trial 90 pruned. 
[I 2023-05-17 06:09:44,235] Trial 91 pruned. 
[I 2023-05-17 06:10:05,698] Trial 92 pruned. 
[I 2023-05-17 06:11:53,132] Trial 93 pruned. 
[I 2023-05-17 06:12:18,070] Trial 94 pruned. 
[I 2023-05-17 06:16:39,839] Trial 95 finished with value: 0.2962709665298462 and parameters: {'embedding_dim': 10, 'step_size': 0.01851901416896681, 'batch_size': 21, 'num_bins': 97, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 06:19:44,587] Trial 96 finished with value: 0.29874420166015625 and parameters: {'embedding_dim': 10, 'step_size': 0.016062342363613872, 'batch_size': 21, 'num_bins': 100, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 06:22:57,478] Trial 97 finished with value: 0.303432822227478 and parameters: {'embedding_dim': 10, 'step_size': 0.02685173836114753, 'batch_size': 20, 'num_bins': 100, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 06:25:31,353] Trial 98 finished with value: 0.29946252703666687 and parameters: {'embedding_dim': 10, 'step_size': 0.018564930846237025, 'batch_size': 24, 'num_bins': 93, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 29 with value: 0.29582858085632324.
[I 2023-05-17 06:28:52,888] Trial 99 finished with value: 0.29889658093452454 and parameters: {'embedding_dim': 10, 'step_size': 0.019834766929900396, 'batch_size': 19, 'num_bins': 97, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 29 with value: 0.29582858085632324.
In [23]:
study_bins.best_params
Out[23]:
{'embedding_dim': 8,
 'step_size': 0.013772781946733741,
 'batch_size': 25,
 'num_bins': 58,
 'bin_strategy': 'uniform',
 'num_epochs': 11}
In [24]:
trial = study_bins.best_trial

print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
Test loss: 0.29582858085632324
Best hyperparameters: {'embedding_dim': 8, 'step_size': 0.013772781946733741, 'batch_size': 25, 'num_bins': 58, 'bin_strategy': 'uniform', 'num_epochs': 11}
In [25]:
train_bin_ffm(**study_bins.best_params)
Out[25]:
0.2982105612754822
In [30]:
bin_losses = []
for i in trange(20):
    loss = train_bin_ffm(**study_bins.best_params)
    bin_losses.append(loss)
100%|██████████| 20/20 [1:02:58<00:00, 188.92s/it]
In [31]:
bin_losses
Out[31]:
[0.2994536757469177,
 0.3009205758571625,
 0.296610563993454,
 0.2973214089870453,
 0.2992994785308838,
 0.2983883023262024,
 0.29931512475013733,
 0.2996693253517151,
 0.2996058762073517,
 0.2982383966445923,
 0.30039355158805847,
 0.3006632924079895,
 0.29591503739356995,
 0.2998608648777008,
 0.2998100817203522,
 0.2978292405605316,
 0.29889991879463196,
 0.2984970808029175,
 0.29809093475341797,
 0.3017430901527405]
In [32]:
np.mean(bin_losses), np.std(bin_losses)
Out[32]:
(0.2990262910723686, 0.001413742829358982)