In [1]:
import sys

sys.path.insert(0, "../utils")
In [28]:
import sklearn.datasets as skds
from sklearn.preprocessing import QuantileTransformer, KBinsDiscretizer, OrdinalEncoder, LabelEncoder, StandardScaler
import numpy as np
import pandas as pd
from transformation import BSplineTransformer, spline_transform_dataset
from trainers import FFMTrainer, FMTrainer
import math
import optuna
import optuna.samplers
from typing import Callable
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset
from tqdm import trange
In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
print(device)
cuda:0
In [4]:
torch.manual_seed(42)
np.random.seed(42)
In [5]:
raw_df = pd.read_csv("../data/Ye_millionsongdataset/Training_set_songs.csv",
                     names=['Year', 'TA01', 'TA02', 'TA03', 'TA04', 'TA05', 'TA06', 'TA07', 'TA08', 'TA09', 'TA10', 'TA11', 'TA12', 'TC01', 'TC02', 'TC03', 'TC04', 'TC05', 'TC06', 'TC07', 'TC08', 'TC09', 'TC10', 'TC11', 'TC12', 'TC13', 'TC14', 'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TC20', 'TC21', 'TC22', 'TC23', 'TC24', 'TC25', 'TC26', 'TC27', 'TC28', 'TC29', 'TC30', 'TC31', 'TC32', 'TC33', 'TC34', 'TC35', 'TC36', 'TC37', 'TC38', 'TC39', 'TC40', 'TC41', 'TC42', 'TC43', 'TC44', 'TC45', 'TC46', 'TC47', 'TC48', 'TC49', 'TC50', 'TC51', 'TC52', 'TC53', 'TC54', 'TC55', 'TC56', 'TC57', 'TC58', 'TC59', 'TC60', 'TC61', 'TC62', 'TC63', 'TC64', 'TC65', 'TC66', 'TC67', 'TC68', 'TC69', 'TC70', 'TC71', 'TC72', 'TC73', 'TC74', 'TC75', 'TC76', 'TC77', 'TC78'],
                     dtype={0:int, 1:float, 2:float, 3:float, 4:float, 5:float, 6:float, 7:float, 8:float, 9:float, 10:float, 11:float, 12:float, 13:float, 14:float, 15:float, 16:float, 17:float, 18:float, 19:float, 20:float, 21:float, 22:float, 23:float, 24:float, 25:float, 26:float, 27:float, 28:float, 29:float, 30:float, 31:float, 32:float, 33:float, 34:float, 35:float, 36:float, 37:float, 38:float, 39:float, 40:float, 41:float, 42:float, 43:float, 44:float, 45:float, 46:float, 47:float, 48:float, 49:float, 50:float, 51:float, 52:float, 53:float, 54:float, 55:float, 56:float, 57:float, 58:float, 59:float, 60:float, 61:float, 62:float, 63:float, 64:float, 65:float, 66:float, 67:float, 68:float, 69:float, 70:float, 71:float, 72:float, 73:float, 74:float, 75:float, 76:float, 77:float, 78:float, 79:float, 80:float, 81:float, 82:float, 83:float, 84:float, 85:float, 86:float, 87:float, 88:float, 89:float, 90:float},
                     na_values="?", skiprows=1)  # TODO: only 3000 lines are loaded in the data
In [6]:
raw_df.sample(6)
Out[6]:
Year TA01 TA02 TA03 TA04 TA05 TA06 TA07 TA08 TA09 ... TC69 TC70 TC71 TC72 TC73 TC74 TC75 TC76 TC77 TC78
332595 2004 51.19589 18.54077 12.55200 3.25000 -7.79542 -19.63461 0.27662 -1.03994 -1.57231 ... -16.43458 -37.05054 -100.93186 49.26529 -1.47467 -39.25996 -22.44389 -16.37939 -49.67664 3.87491
230573 1989 51.44573 53.42621 35.83483 -16.72867 -48.13185 -13.04248 -45.74081 -6.18791 16.60100 ... 9.94220 -71.82280 59.40250 45.14201 -2.72343 35.62292 37.99939 7.04862 21.08678 2.07779
364530 1987 44.85215 33.51052 27.44631 24.99783 -15.02508 12.29223 10.57365 7.06412 -1.29649 ... 5.05131 56.78609 136.98499 -30.38374 24.51034 62.77834 44.80304 30.34866 105.17991 2.58183
82857 2002 50.25653 59.83236 37.80210 -0.57762 -1.64064 -11.77884 -2.25574 -4.17007 4.05922 ... -4.12555 21.34620 29.72172 56.71419 2.61590 56.47152 -26.05716 -0.77059 29.40943 -0.02311
108108 1971 45.48775 -5.49790 9.56187 9.36977 -11.15726 -3.63341 11.00297 -6.36722 5.37455 ... -29.83405 228.02525 53.68190 -47.38609 14.62809 124.90797 -26.61476 5.08838 295.42035 19.74883
446568 2005 49.59492 35.40110 -8.11273 -13.40502 3.18931 -14.05923 3.04436 -1.43375 3.60354 ... -0.40689 -91.80271 -29.51019 -6.40710 6.99983 81.51023 -83.44656 2.81049 24.80947 -10.32877

6 rows × 91 columns

In [7]:
raw_df.shape
Out[7]:
(463715, 91)
In [8]:
raw_df.columns
Out[8]:
Index(['Year', 'TA01', 'TA02', 'TA03', 'TA04', 'TA05', 'TA06', 'TA07', 'TA08',
       'TA09', 'TA10', 'TA11', 'TA12', 'TC01', 'TC02', 'TC03', 'TC04', 'TC05',
       'TC06', 'TC07', 'TC08', 'TC09', 'TC10', 'TC11', 'TC12', 'TC13', 'TC14',
       'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TC20', 'TC21', 'TC22', 'TC23',
       'TC24', 'TC25', 'TC26', 'TC27', 'TC28', 'TC29', 'TC30', 'TC31', 'TC32',
       'TC33', 'TC34', 'TC35', 'TC36', 'TC37', 'TC38', 'TC39', 'TC40', 'TC41',
       'TC42', 'TC43', 'TC44', 'TC45', 'TC46', 'TC47', 'TC48', 'TC49', 'TC50',
       'TC51', 'TC52', 'TC53', 'TC54', 'TC55', 'TC56', 'TC57', 'TC58', 'TC59',
       'TC60', 'TC61', 'TC62', 'TC63', 'TC64', 'TC65', 'TC66', 'TC67', 'TC68',
       'TC69', 'TC70', 'TC71', 'TC72', 'TC73', 'TC74', 'TC75', 'TC76', 'TC77',
       'TC78'],
      dtype='object')
In [9]:
train, test = train_test_split(raw_df, test_size=0.2, random_state=42)
In [10]:
tr_feats = train.drop("Year", axis=1)
tr_target = train["Year"].values
te_feats = test.drop("Year", axis=1)
te_target = test["Year"].values
In [11]:
target_scaler = StandardScaler()
tr_target = target_scaler.fit_transform(tr_target.reshape(-1, 1)).reshape(-1)
te_target = target_scaler.transform(te_target.reshape(-1, 1)).reshape(-1)
In [12]:
quant_transform = QuantileTransformer(output_distribution='uniform',
                                      n_quantiles=10000,
                                      subsample=len(tr_feats),
                                      random_state=42)
X_train_qs = quant_transform.fit_transform(tr_feats)
X_test_qs = quant_transform.transform(te_feats)
In [13]:
def train_spline_fm(embedding_dim: int, step_size: float, batch_size: int, num_knots: int, num_epochs: int,
                     callback: Callable[[int, float], None]=None):
    bs = BSplineTransformer(num_knots, 3)
    tr_indices, tr_weights, tr_offsets, tr_fields = spline_transform_dataset(X_train_qs, bs)
    te_indices, te_weights, te_offsets, te_fields = spline_transform_dataset(X_test_qs, bs)

    num_fields = X_train_qs.shape[1]
    num_embeddings = int(max(np.max(tr_indices), np.max(te_indices)) + 1)

    train_ds = TensorDataset(
        torch.tensor(tr_indices, dtype=torch.int64),
        torch.tensor(tr_weights, dtype=torch.float32),
        torch.tensor(tr_offsets, dtype=torch.int64),
        torch.tensor(tr_fields, dtype=torch.int64),
        torch.tensor(tr_target, dtype=torch.float32))

    test_ds = TensorDataset(
        torch.tensor(te_indices, dtype=torch.int64),
        torch.tensor(te_weights, dtype=torch.float32),
        torch.tensor(te_offsets, dtype=torch.int64),
        torch.tensor(te_fields, dtype=torch.int64),
        torch.tensor(te_target, dtype=torch.float32))


    trainer = FMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
    return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.MSELoss(), device)
In [14]:
def train_spline_objective(trial: optuna.Trial):
    embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
    step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
    batch_size = trial.suggest_int('batch_size', 32, 256)
    num_knots = trial.suggest_int('num_knots', 3, 48)
    num_epochs = trial.suggest_int('num_epochs', 5, 15)

    def callback(epoch: int, loss: float):
        trial.report(math.sqrt(loss), epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return math.sqrt(train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs,
                                     callback=callback))
In [15]:
study = optuna.create_study(study_name='splines',
                            direction='minimize',
                            sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(train_spline_objective, n_trials=100)
[I 2023-05-16 18:53:23,460] A new study created in memory with name: splines
[I 2023-05-16 18:56:43,426] Trial 0 finished with value: 1.2386136255730438 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 196, 'num_knots': 30, 'num_epochs': 6}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-16 19:03:07,293] Trial 1 finished with value: 9.578139736045964 and parameters: {'embedding_dim': 2, 'step_size': 0.012551115172973842, 'batch_size': 226, 'num_knots': 30, 'num_epochs': 12}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-16 19:06:49,945] Trial 2 finished with value: 1.4629122193885566 and parameters: {'embedding_dim': 1, 'step_size': 0.44447541666908114, 'batch_size': 219, 'num_knots': 12, 'num_epochs': 7}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-16 19:13:15,304] Trial 3 finished with value: 5.51720045029323 and parameters: {'embedding_dim': 2, 'step_size': 0.0328774741399112, 'batch_size': 150, 'num_knots': 22, 'num_epochs': 8}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-16 19:23:13,052] Trial 4 finished with value: 2.7195761291430305 and parameters: {'embedding_dim': 7, 'step_size': 0.017258215396625, 'batch_size': 97, 'num_knots': 19, 'num_epochs': 10}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-16 19:24:34,830] Trial 5 pruned. 
[I 2023-05-16 19:26:59,399] Trial 6 pruned. 
[I 2023-05-16 19:39:46,136] Trial 7 finished with value: 1.3228858834036774 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 53, 'num_knots': 34, 'num_epochs': 9}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-16 19:52:35,118] Trial 8 finished with value: 1.550295967639816 and parameters: {'embedding_dim': 2, 'step_size': 0.06938901412739397, 'batch_size': 39, 'num_knots': 44, 'num_epochs': 7}. Best is trial 0 with value: 1.2386136255730438.
[I 2023-05-16 19:53:51,842] Trial 9 pruned. 
[I 2023-05-16 19:57:07,263] Trial 10 finished with value: 1.1966288679541184 and parameters: {'embedding_dim': 4, 'step_size': 0.38450727047302585, 'batch_size': 188, 'num_knots': 3, 'num_epochs': 5}. Best is trial 10 with value: 1.1966288679541184.
[I 2023-05-16 20:00:16,413] Trial 11 finished with value: 1.166368747320352 and parameters: {'embedding_dim': 4, 'step_size': 0.45819653342095856, 'batch_size': 192, 'num_knots': 6, 'num_epochs': 5}. Best is trial 11 with value: 1.166368747320352.
[I 2023-05-16 20:03:34,162] Trial 12 finished with value: 1.2247194669600276 and parameters: {'embedding_dim': 4, 'step_size': 0.20992321291982594, 'batch_size': 184, 'num_knots': 4, 'num_epochs': 5}. Best is trial 11 with value: 1.166368747320352.
[I 2023-05-16 20:09:21,405] Trial 13 finished with value: 1.0230244392652823 and parameters: {'embedding_dim': 5, 'step_size': 0.25231843076021476, 'batch_size': 256, 'num_knots': 3, 'num_epochs': 12}. Best is trial 13 with value: 1.0230244392652823.
[I 2023-05-16 20:15:25,054] Trial 14 finished with value: 1.1263353581741014 and parameters: {'embedding_dim': 5, 'step_size': 0.22060040200689754, 'batch_size': 256, 'num_knots': 12, 'num_epochs': 12}. Best is trial 13 with value: 1.0230244392652823.
[I 2023-05-16 20:22:02,080] Trial 15 finished with value: 1.0999773131545263 and parameters: {'embedding_dim': 6, 'step_size': 0.18312582558712856, 'batch_size': 256, 'num_knots': 13, 'num_epochs': 13}. Best is trial 13 with value: 1.0230244392652823.
[I 2023-05-16 20:28:42,275] Trial 16 finished with value: 1.0410950936336398 and parameters: {'embedding_dim': 10, 'step_size': 0.1475792084364477, 'batch_size': 247, 'num_knots': 12, 'num_epochs': 13}. Best is trial 13 with value: 1.0230244392652823.
[I 2023-05-16 20:30:11,072] Trial 17 pruned. 
[I 2023-05-16 20:42:32,391] Trial 18 finished with value: 0.9837241670350367 and parameters: {'embedding_dim': 10, 'step_size': 0.11027358472172152, 'batch_size': 101, 'num_knots': 8, 'num_epochs': 13}. Best is trial 18 with value: 0.9837241670350367.
[I 2023-05-16 20:52:31,862] Trial 19 finished with value: 1.0135361538325767 and parameters: {'embedding_dim': 9, 'step_size': 0.09653067289462239, 'batch_size': 106, 'num_knots': 8, 'num_epochs': 11}. Best is trial 18 with value: 0.9837241670350367.
[I 2023-05-16 21:02:01,178] Trial 20 finished with value: 1.0474427804363677 and parameters: {'embedding_dim': 9, 'step_size': 0.08886733479770562, 'batch_size': 100, 'num_knots': 9, 'num_epochs': 10}. Best is trial 18 with value: 0.9837241670350367.
[I 2023-05-16 21:03:17,069] Trial 21 pruned. 
[I 2023-05-16 21:12:46,324] Trial 22 finished with value: 0.9591751996469187 and parameters: {'embedding_dim': 8, 'step_size': 0.2688719862751585, 'batch_size': 77, 'num_knots': 16, 'num_epochs': 11}. Best is trial 22 with value: 0.9591751996469187.
[I 2023-05-16 21:21:46,922] Trial 23 pruned. 
[I 2023-05-16 21:32:40,408] Trial 24 finished with value: 1.013309950705173 and parameters: {'embedding_dim': 10, 'step_size': 0.1381692732684226, 'batch_size': 122, 'num_knots': 20, 'num_epochs': 14}. Best is trial 22 with value: 0.9591751996469187.
[I 2023-05-16 21:44:32,521] Trial 25 finished with value: 0.9766103503901942 and parameters: {'embedding_dim': 10, 'step_size': 0.30666487353085764, 'batch_size': 120, 'num_knots': 22, 'num_epochs': 14}. Best is trial 22 with value: 0.9591751996469187.
[I 2023-05-16 21:57:07,523] Trial 26 finished with value: 0.9631696687473242 and parameters: {'embedding_dim': 8, 'step_size': 0.30221758573737095, 'batch_size': 74, 'num_knots': 26, 'num_epochs': 14}. Best is trial 22 with value: 0.9591751996469187.
[I 2023-05-16 22:08:48,665] Trial 27 pruned. 
[I 2023-05-16 22:22:53,824] Trial 28 pruned. 
[I 2023-05-16 22:27:42,916] Trial 29 pruned. 
[I 2023-05-16 22:33:49,484] Trial 30 pruned. 
[I 2023-05-16 22:44:19,069] Trial 31 finished with value: 0.9559326717458044 and parameters: {'embedding_dim': 10, 'step_size': 0.1845435818497936, 'batch_size': 83, 'num_knots': 15, 'num_epochs': 13}. Best is trial 31 with value: 0.9559326717458044.
[I 2023-05-16 22:45:30,091] Trial 32 pruned. 
[I 2023-05-16 22:59:50,871] Trial 33 finished with value: 0.9604147753141788 and parameters: {'embedding_dim': 10, 'step_size': 0.288261536780601, 'batch_size': 57, 'num_knots': 22, 'num_epochs': 13}. Best is trial 31 with value: 0.9559326717458044.
[I 2023-05-16 23:15:48,223] Trial 34 finished with value: 0.9742608985868826 and parameters: {'embedding_dim': 8, 'step_size': 0.17867376960796208, 'batch_size': 59, 'num_knots': 26, 'num_epochs': 13}. Best is trial 31 with value: 0.9559326717458044.
[I 2023-05-16 23:37:28,738] Trial 35 finished with value: 0.9171861274877934 and parameters: {'embedding_dim': 9, 'step_size': 0.2645542020430195, 'batch_size': 32, 'num_knots': 19, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-16 23:55:05,064] Trial 36 finished with value: 1.0132628334036484 and parameters: {'embedding_dim': 9, 'step_size': 0.40545523268469424, 'batch_size': 35, 'num_knots': 20, 'num_epochs': 9}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 00:05:42,393] Trial 37 finished with value: 0.9345916139732143 and parameters: {'embedding_dim': 10, 'step_size': 0.25690624184860333, 'batch_size': 59, 'num_knots': 14, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 00:20:05,720] Trial 38 finished with value: 0.9352145948828553 and parameters: {'embedding_dim': 9, 'step_size': 0.22319832065362558, 'batch_size': 49, 'num_knots': 14, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 00:26:44,288] Trial 39 pruned. 
[I 2023-05-17 00:41:30,166] Trial 40 finished with value: 0.9663647234913916 and parameters: {'embedding_dim': 9, 'step_size': 0.15557203352959853, 'batch_size': 48, 'num_knots': 18, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 00:49:39,157] Trial 41 finished with value: 0.9422524159401244 and parameters: {'embedding_dim': 10, 'step_size': 0.24528852921151278, 'batch_size': 63, 'num_knots': 10, 'num_epochs': 8}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 00:58:35,285] Trial 42 finished with value: 0.9372499450353899 and parameters: {'embedding_dim': 10, 'step_size': 0.2395050154883088, 'batch_size': 57, 'num_knots': 10, 'num_epochs': 8}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 01:06:42,836] Trial 43 finished with value: 0.9408245901462334 and parameters: {'embedding_dim': 10, 'step_size': 0.24135658451497335, 'batch_size': 64, 'num_knots': 10, 'num_epochs': 8}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 01:18:11,857] Trial 44 finished with value: 0.9583412080247737 and parameters: {'embedding_dim': 9, 'step_size': 0.39856936883703, 'batch_size': 42, 'num_knots': 10, 'num_epochs': 8}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 01:25:08,180] Trial 45 pruned. 
[I 2023-05-17 01:30:13,286] Trial 46 finished with value: 0.9433470225660775 and parameters: {'embedding_dim': 9, 'step_size': 0.3592001787790354, 'batch_size': 63, 'num_knots': 6, 'num_epochs': 6}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 01:42:03,625] Trial 47 finished with value: 0.931486584866254 and parameters: {'embedding_dim': 10, 'step_size': 0.225118064987353, 'batch_size': 43, 'num_knots': 14, 'num_epochs': 9}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 01:54:11,688] Trial 48 finished with value: 0.9828667060204228 and parameters: {'embedding_dim': 10, 'step_size': 0.45874685485020067, 'batch_size': 42, 'num_knots': 14, 'num_epochs': 9}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 01:58:13,715] Trial 49 pruned. 
[I 2023-05-17 02:09:46,741] Trial 50 finished with value: 0.9406328625524666 and parameters: {'embedding_dim': 9, 'step_size': 0.20765904624110154, 'batch_size': 50, 'num_knots': 13, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 02:21:07,688] Trial 51 finished with value: 0.9405742149964395 and parameters: {'embedding_dim': 9, 'step_size': 0.1969866703171254, 'batch_size': 51, 'num_knots': 13, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934.
[I 2023-05-17 02:36:19,135] Trial 52 finished with value: 0.8842285075359627 and parameters: {'embedding_dim': 10, 'step_size': 0.2645915336156327, 'batch_size': 33, 'num_knots': 5, 'num_epochs': 9}. Best is trial 52 with value: 0.8842285075359627.
[I 2023-05-17 02:50:02,113] Trial 53 finished with value: 0.8876547893343609 and parameters: {'embedding_dim': 10, 'step_size': 0.2727717956425, 'batch_size': 36, 'num_knots': 5, 'num_epochs': 9}. Best is trial 52 with value: 0.8842285075359627.
[I 2023-05-17 03:04:02,753] Trial 54 finished with value: 0.8829253470615361 and parameters: {'embedding_dim': 10, 'step_size': 0.2762817857390965, 'batch_size': 35, 'num_knots': 4, 'num_epochs': 9}. Best is trial 54 with value: 0.8829253470615361.
[I 2023-05-17 03:19:30,440] Trial 55 finished with value: 0.8949441081981341 and parameters: {'embedding_dim': 10, 'step_size': 0.2824114142172617, 'batch_size': 32, 'num_knots': 5, 'num_epochs': 9}. Best is trial 54 with value: 0.8829253470615361.
[I 2023-05-17 03:34:35,882] Trial 56 finished with value: 0.8915999914951418 and parameters: {'embedding_dim': 10, 'step_size': 0.40545429992131593, 'batch_size': 33, 'num_knots': 3, 'num_epochs': 9}. Best is trial 54 with value: 0.8829253470615361.
[I 2023-05-17 03:45:22,948] Trial 57 finished with value: 0.9174837819882632 and parameters: {'embedding_dim': 10, 'step_size': 0.42181265669918006, 'batch_size': 36, 'num_knots': 5, 'num_epochs': 7}. Best is trial 54 with value: 0.8829253470615361.
[I 2023-05-17 04:01:08,351] Trial 58 finished with value: 0.8812943291843862 and parameters: {'embedding_dim': 10, 'step_size': 0.36073913832745863, 'batch_size': 32, 'num_knots': 3, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 04:02:48,388] Trial 59 pruned. 
[I 2023-05-17 04:03:31,415] Trial 60 pruned. 
[I 2023-05-17 04:18:57,676] Trial 61 finished with value: 0.8945438434097501 and parameters: {'embedding_dim': 9, 'step_size': 0.2875851804909289, 'batch_size': 33, 'num_knots': 6, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 04:34:24,387] Trial 62 finished with value: 0.9080907608582166 and parameters: {'embedding_dim': 10, 'step_size': 0.2859064435511358, 'batch_size': 33, 'num_knots': 6, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 04:46:47,936] Trial 63 finished with value: 0.923213213745785 and parameters: {'embedding_dim': 10, 'step_size': 0.3574927238399868, 'batch_size': 41, 'num_knots': 8, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 05:02:15,375] Trial 64 finished with value: 0.9303926950883815 and parameters: {'embedding_dim': 9, 'step_size': 0.4984824913019317, 'batch_size': 32, 'num_knots': 5, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 05:12:20,767] Trial 65 finished with value: 0.8836246232120878 and parameters: {'embedding_dim': 10, 'step_size': 0.2918598742790804, 'batch_size': 45, 'num_knots': 3, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 05:13:23,409] Trial 66 pruned. 
[I 2023-05-17 05:23:17,634] Trial 67 finished with value: 0.9203880084785563 and parameters: {'embedding_dim': 10, 'step_size': 0.2978806762773378, 'batch_size': 46, 'num_knots': 7, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 05:24:07,220] Trial 68 pruned. 
[I 2023-05-17 05:25:26,541] Trial 69 pruned. 
[I 2023-05-17 05:38:54,831] Trial 70 finished with value: 0.8885545925677477 and parameters: {'embedding_dim': 6, 'step_size': 0.3377290795161348, 'batch_size': 40, 'num_knots': 4, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 05:53:02,508] Trial 71 finished with value: 0.898234601537262 and parameters: {'embedding_dim': 6, 'step_size': 0.32131412729525716, 'batch_size': 39, 'num_knots': 4, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 05:55:20,996] Trial 72 pruned. 
[I 2023-05-17 05:57:42,907] Trial 73 pruned. 
[I 2023-05-17 06:09:46,746] Trial 74 finished with value: 0.9336780206801858 and parameters: {'embedding_dim': 10, 'step_size': 0.3363325348006269, 'batch_size': 38, 'num_knots': 8, 'num_epochs': 7}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 06:10:46,772] Trial 75 pruned. 
[I 2023-05-17 06:23:23,832] Trial 76 finished with value: 0.9035483242637248 and parameters: {'embedding_dim': 10, 'step_size': 0.3023068642066013, 'batch_size': 46, 'num_knots': 6, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 06:34:00,400] Trial 77 finished with value: 0.9356443477813984 and parameters: {'embedding_dim': 8, 'step_size': 0.3707428489721444, 'batch_size': 53, 'num_knots': 8, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 06:40:20,237] Trial 78 finished with value: 0.8893062800372801 and parameters: {'embedding_dim': 9, 'step_size': 0.27560158762743114, 'batch_size': 66, 'num_knots': 3, 'num_epochs': 10}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 06:48:23,467] Trial 79 finished with value: 0.8840000301227305 and parameters: {'embedding_dim': 10, 'step_size': 0.2621309491504174, 'batch_size': 68, 'num_knots': 3, 'num_epochs': 11}. Best is trial 58 with value: 0.8812943291843862.
[I 2023-05-17 06:49:38,654] Trial 80 pruned. 
[I 2023-05-17 06:50:44,419] Trial 81 pruned. 
[I 2023-05-17 07:01:59,621] Trial 82 finished with value: 0.8794329950964015 and parameters: {'embedding_dim': 10, 'step_size': 0.24654623010907453, 'batch_size': 68, 'num_knots': 3, 'num_epochs': 12}. Best is trial 82 with value: 0.8794329950964015.
[I 2023-05-17 07:03:01,386] Trial 83 pruned. 
[I 2023-05-17 07:04:27,287] Trial 84 pruned. 
[I 2023-05-17 07:05:37,113] Trial 85 pruned. 
[I 2023-05-17 07:06:37,147] Trial 86 pruned. 
[I 2023-05-17 07:07:52,641] Trial 87 pruned. 
[I 2023-05-17 07:10:41,882] Trial 88 pruned. 
[I 2023-05-17 07:12:02,122] Trial 89 pruned. 
[I 2023-05-17 07:13:00,456] Trial 90 pruned. 
[I 2023-05-17 07:25:04,550] Trial 91 finished with value: 0.8899198718915379 and parameters: {'embedding_dim': 10, 'step_size': 0.38574848982787274, 'batch_size': 37, 'num_knots': 3, 'num_epochs': 8}. Best is trial 82 with value: 0.8794329950964015.
[I 2023-05-17 07:36:52,813] Trial 92 finished with value: 0.8794658998359767 and parameters: {'embedding_dim': 10, 'step_size': 0.24069912203863186, 'batch_size': 38, 'num_knots': 3, 'num_epochs': 8}. Best is trial 82 with value: 0.8794329950964015.
[I 2023-05-17 07:44:42,769] Trial 93 finished with value: 0.9155397785613877 and parameters: {'embedding_dim': 10, 'step_size': 0.2388690985342639, 'batch_size': 53, 'num_knots': 6, 'num_epochs': 7}. Best is trial 82 with value: 0.8794329950964015.
[I 2023-05-17 07:45:35,447] Trial 94 pruned. 
[I 2023-05-17 07:58:02,324] Trial 95 finished with value: 0.9002698705631274 and parameters: {'embedding_dim': 9, 'step_size': 0.22749984739763673, 'batch_size': 46, 'num_knots': 5, 'num_epochs': 10}. Best is trial 82 with value: 0.8794329950964015.
[I 2023-05-17 08:00:11,700] Trial 96 pruned. 
[I 2023-05-17 08:08:32,642] Trial 97 finished with value: 0.8877704114380222 and parameters: {'embedding_dim': 9, 'step_size': 0.2560666131932691, 'batch_size': 71, 'num_knots': 3, 'num_epochs': 11}. Best is trial 82 with value: 0.8794329950964015.
[I 2023-05-17 08:09:38,192] Trial 98 pruned. 
[I 2023-05-17 08:14:32,944] Trial 99 pruned. 
In [16]:
trial = study.best_trial

print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
Test loss: 0.8794329950964015
Best hyperparameters: {'embedding_dim': 10, 'step_size': 0.24654623010907453, 'batch_size': 68, 'num_knots': 3, 'num_epochs': 12}
In [17]:
study.best_params
Out[17]:
{'embedding_dim': 10,
 'step_size': 0.24654623010907453,
 'batch_size': 68,
 'num_knots': 3,
 'num_epochs': 12}
In [18]:
train_spline_fm(**study.best_params)
Out[18]:
0.7787345051765442
In [29]:
spline_losses = []
for i in trange(20):
    loss = train_spline_fm(**study.best_params)
    spline_losses.append(math.sqrt(loss))
100%|██████████| 20/20 [2:39:59<00:00, 479.95s/it]  
In [30]:
spline_losses
Out[30]:
[0.881695640108332,
 0.881986518583829,
 0.8812350129821337,
 0.8835068731661306,
 0.8756028550892196,
 0.8795998443241684,
 0.8812374141146075,
 0.8793456949910461,
 0.879686949816137,
 0.8792196435620432,
 0.8800452079323017,
 0.8800887905106133,
 0.8787611778058435,
 0.8798512104892138,
 0.8808272328401603,
 0.8801772358534317,
 0.8815416286395354,
 0.8799189857558083,
 0.8787488669310817,
 0.8843083494702224]
In [33]:
np.mean(spline_losses), np.std(spline_losses), 100 * np.std(spline_losses) / np.mean(spline_losses)
Out[33]:
(0.880369256648293, 0.0018009996668723587, 0.2045732121234053)
In [19]:
def train_bin_fm(embedding_dim: int, step_size: float, batch_size: int,
                  num_bins: int, bin_strategy: str, num_epochs: int,
                  callback: Callable[[int, float], None]=None):
    num_fields = tr_feats.shape[1]
    num_embeddings = num_fields * num_bins
    index_offsets = np.arange(0, num_fields) * num_bins

    discretizer = KBinsDiscretizer(num_bins, encode='ordinal', strategy=bin_strategy, random_state=42)
    discretizer.fit(tr_feats)

    tr_indices = discretizer.transform(tr_feats)
    tr_indices += np.tile(index_offsets, (tr_indices.shape[0], 1))
    tr_weights = np.ones_like(tr_indices)
    tr_fields = np.tile(np.arange(0, num_fields), (tr_indices.shape[0], 1))
    tr_offsets = tr_fields.copy()

    te_indices = discretizer.transform(te_feats)
    te_indices += np.tile(index_offsets, (te_indices.shape[0], 1))
    te_weights = np.ones_like(te_indices)
    te_fields = np.tile(np.arange(0, num_fields), (te_indices.shape[0], 1))
    te_offsets = te_fields.copy()

    train_ds = TensorDataset(
        torch.tensor(tr_indices, dtype=torch.int64),
        torch.tensor(tr_weights, dtype=torch.float32),
        torch.tensor(tr_offsets, dtype=torch.int64),
        torch.tensor(tr_fields, dtype=torch.int64),
        torch.tensor(tr_target, dtype=torch.float32))

    test_ds = TensorDataset(
        torch.tensor(te_indices, dtype=torch.int64),
        torch.tensor(te_weights, dtype=torch.float32),
        torch.tensor(te_offsets, dtype=torch.int64),
        torch.tensor(te_fields, dtype=torch.int64),
        torch.tensor(te_target, dtype=torch.float32))

    trainer = FMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
    return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.MSELoss(), device)
In [20]:
def test_bins_objective(trial: optuna.Trial):
    embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
    step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
    batch_size = trial.suggest_int('batch_size', 32, 256)
    num_bins = trial.suggest_int('num_bins', 2, 100)
    bin_strategy = trial.suggest_categorical('bin_strategy', ['uniform', 'quantile'])
    num_epochs = trial.suggest_int('num_epochs', 5, 15)

    def callback(epoch: int, loss: float):
        trial.report(math.sqrt(loss), epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return math.sqrt(train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs,
                                  callback=callback))
In [21]:
study_bins = optuna.create_study(study_name='bins',
                                 direction='minimize',
                                 sampler=optuna.samplers.TPESampler(seed=42))
study_bins.optimize(test_bins_objective, n_trials=100)
[I 2023-05-17 08:23:55,739] A new study created in memory with name: bins
[I 2023-05-17 08:25:38,113] Trial 0 finished with value: 1.5271531398304155 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 196, 'num_bins': 61, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 0 with value: 1.5271531398304155.
[I 2023-05-17 08:28:04,884] Trial 1 finished with value: 0.9448509350530763 and parameters: {'embedding_dim': 9, 'step_size': 0.10502105436744279, 'batch_size': 191, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9448509350530763.
[I 2023-05-17 08:34:45,404] Trial 2 finished with value: 5.603986016331611 and parameters: {'embedding_dim': 2, 'step_size': 0.020492680115417352, 'batch_size': 100, 'num_bins': 53, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 1 with value: 0.9448509350530763.
[I 2023-05-17 08:40:16,860] Trial 3 finished with value: 3.487825336116525 and parameters: {'embedding_dim': 2, 'step_size': 0.03135775732257745, 'batch_size': 114, 'num_bins': 47, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 1 with value: 0.9448509350530763.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 08:45:51,720] Trial 4 finished with value: 5.078675976720081 and parameters: {'embedding_dim': 6, 'step_size': 0.011992724522955167, 'batch_size': 168, 'num_bins': 18, 'bin_strategy': 'quantile', 'num_epochs': 15}. Best is trial 1 with value: 0.9448509350530763.
[I 2023-05-17 08:56:25,390] Trial 5 finished with value: 1.5473596603529345 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 53, 'num_bins': 69, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 1 with value: 0.9448509350530763.
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 09:03:33,049] Trial 6 finished with value: 1.2903412458488763 and parameters: {'embedding_dim': 1, 'step_size': 0.35067764992972184, 'batch_size': 90, 'num_bins': 67, 'bin_strategy': 'quantile', 'num_epochs': 11}. Best is trial 1 with value: 0.9448509350530763.
[I 2023-05-17 09:08:53,444] Trial 7 finished with value: 1.204044238579979 and parameters: {'embedding_dim': 2, 'step_size': 0.4439102767051397, 'batch_size': 206, 'num_bins': 95, 'bin_strategy': 'uniform', 'num_epochs': 15}. Best is trial 1 with value: 0.9448509350530763.
[I 2023-05-17 09:10:11,410] Trial 8 pruned. 
[I 2023-05-17 09:10:36,489] Trial 9 pruned. 
/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.
  warnings.warn(
[I 2023-05-17 09:12:05,260] Trial 10 finished with value: 1.3519945281047807 and parameters: {'embedding_dim': 10, 'step_size': 0.12983523609376665, 'batch_size': 250, 'num_bins': 3, 'bin_strategy': 'quantile', 'num_epochs': 5}. Best is trial 1 with value: 0.9448509350530763.
[I 2023-05-17 09:14:21,947] Trial 11 finished with value: 1.5510122285726062 and parameters: {'embedding_dim': 8, 'step_size': 0.1376872218807011, 'batch_size': 216, 'num_bins': 100, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9448509350530763.
[W 2023-05-17 09:14:22,281] Trial 12 failed with parameters: {'embedding_dim': 7, 'step_size': 0.21367114396906817, 'batch_size': 202, 'num_bins': 95, 'bin_strategy': 'uniform', 'num_epochs': 8} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_258955/2787357413.py", line 14, in test_bins_objective
    return math.sqrt(train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs,
  File "/tmp/ipykernel_258955/1864156579.py", line 11, in train_bin_fm
    tr_indices = discretizer.transform(tr_feats)
  File "/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py", line 375, in transform
    Xt[:, jj] = np.searchsorted(bin_edges[jj][1:-1], Xt[:, jj], side="right")
  File "<__array_function__ internals>", line 180, in searchsorted
  File "/usr/local/lib64/python3.9/site-packages/numpy/core/fromnumeric.py", line 1387, in searchsorted
    return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
  File "/usr/local/lib64/python3.9/site-packages/numpy/core/fromnumeric.py", line 57, in _wrapfunc
    return bound(*args, **kwds)
KeyboardInterrupt
[W 2023-05-17 09:14:22,283] Trial 12 failed with value None.
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Input In [21], in <cell line: 4>()
      1 study_bins = optuna.create_study(study_name='bins',
      2                                  direction='minimize',
      3                                  sampler=optuna.samplers.TPESampler(seed=42))
----> 4 study_bins.optimize(test_bins_objective, n_trials=100)

File /usr/local/lib/python3.9/site-packages/optuna/study/study.py:425, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
    321 def optimize(
    322     self,
    323     func: ObjectiveFuncType,
   (...)
    330     show_progress_bar: bool = False,
    331 ) -> None:
    332     """Optimize an objective function.
    333 
    334     Optimization is done by choosing a suitable set of hyperparameter values from a given
   (...)
    422             If nested invocation of this method occurs.
    423     """
--> 425     _optimize(
    426         study=self,
    427         func=func,
    428         n_trials=n_trials,
    429         timeout=timeout,
    430         n_jobs=n_jobs,
    431         catch=tuple(catch) if isinstance(catch, Iterable) else (catch,),
    432         callbacks=callbacks,
    433         gc_after_trial=gc_after_trial,
    434         show_progress_bar=show_progress_bar,
    435     )

File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:66, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
     64 try:
     65     if n_jobs == 1:
---> 66         _optimize_sequential(
     67             study,
     68             func,
     69             n_trials,
     70             timeout,
     71             catch,
     72             callbacks,
     73             gc_after_trial,
     74             reseed_sampler_rng=False,
     75             time_start=None,
     76             progress_bar=progress_bar,
     77         )
     78     else:
     79         if n_jobs == -1:

File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:163, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
    160         break
    162 try:
--> 163     frozen_trial = _run_trial(study, func, catch)
    164 finally:
    165     # The following line mitigates memory problems that can be occurred in some
    166     # environments (e.g., services that use computing containers such as GitHub Actions).
    167     # Please refer to the following PR for further details:
    168     # https://github.com/optuna/optuna/pull/325.
    169     if gc_after_trial:

File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:251, in _run_trial(study, func, catch)
    244         assert False, "Should not reach."
    246 if (
    247     frozen_trial.state == TrialState.FAIL
    248     and func_err is not None
    249     and not isinstance(func_err, catch)
    250 ):
--> 251     raise func_err
    252 return frozen_trial

File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:200, in _run_trial(study, func, catch)
    198 with get_heartbeat_thread(trial._trial_id, study._storage):
    199     try:
--> 200         value_or_values = func(trial)
    201     except exceptions.TrialPruned as e:
    202         # TODO(mamu): Handle multi-objective cases.
    203         state = TrialState.PRUNED

Input In [20], in test_bins_objective(trial)
     11     if trial.should_prune():
     12         raise optuna.TrialPruned()
---> 14 return math.sqrt(train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs,
     15                               callback=callback))

Input In [19], in train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs, callback)
      8 discretizer = KBinsDiscretizer(num_bins, encode='ordinal', strategy=bin_strategy, random_state=42)
      9 discretizer.fit(tr_feats)
---> 11 tr_indices = discretizer.transform(tr_feats)
     12 tr_indices += np.tile(index_offsets, (tr_indices.shape[0], 1))
     13 tr_weights = np.ones_like(tr_indices)

File /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:375, in KBinsDiscretizer.transform(self, X)
    373 bin_edges = self.bin_edges_
    374 for jj in range(Xt.shape[1]):
--> 375     Xt[:, jj] = np.searchsorted(bin_edges[jj][1:-1], Xt[:, jj], side="right")
    377 if self.encode == "ordinal":
    378     return Xt

File <__array_function__ internals>:180, in searchsorted(*args, **kwargs)

File /usr/local/lib64/python3.9/site-packages/numpy/core/fromnumeric.py:1387, in searchsorted(a, v, side, sorter)
   1319 @array_function_dispatch(_searchsorted_dispatcher)
   1320 def searchsorted(a, v, side='left', sorter=None):
   1321     """
   1322     Find indices where elements should be inserted to maintain order.
   1323 
   (...)
   1385 
   1386     """
-> 1387     return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)

File /usr/local/lib64/python3.9/site-packages/numpy/core/fromnumeric.py:57, in _wrapfunc(obj, method, *args, **kwds)
     54     return _wrapit(obj, method, *args, **kwds)
     56 try:
---> 57     return bound(*args, **kwds)
     58 except TypeError:
     59     # A TypeError occurs if the object does have such a method in its
     60     # class, but its signature is not identical to that of NumPy's. This
   (...)
     64     # Call _wrapit from within the except clause to ensure a potential
     65     # exception has a traceback chain.
     66     return _wrapit(obj, method, *args, **kwds)

KeyboardInterrupt: 
In [ ]:
study_bins.best_params
In [ ]:
trial = study_bins.best_trial

print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
In [ ]:
train_bin_fm(**study_bins.best_params)
In [ ]:
bin_losses = []
for i in trange(20):
    loss = train_bin_fm(**study_bins.best_params)
    bin_losses.append(math.sqrt(loss))
In [ ]:
bin_losses
In [ ]:
np.mean(bin_losses), np.std(bin_losses)