import sys
sys.path.insert(0, "../utils")
import sklearn.datasets as skds
from sklearn.preprocessing import QuantileTransformer, KBinsDiscretizer, OrdinalEncoder, LabelEncoder, StandardScaler
import numpy as np
import pandas as pd
from transformation import BSplineTransformer, spline_transform_dataset
from trainers import FFMTrainer, FMTrainer
import math
import optuna
import optuna.samplers
from typing import Callable
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset
from tqdm import trange
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
print(device)
cuda:0
torch.manual_seed(42)
np.random.seed(42)
raw_df = pd.read_csv("../data/Ye_millionsongdataset/Training_set_songs.csv",
names=['Year', 'TA01', 'TA02', 'TA03', 'TA04', 'TA05', 'TA06', 'TA07', 'TA08', 'TA09', 'TA10', 'TA11', 'TA12', 'TC01', 'TC02', 'TC03', 'TC04', 'TC05', 'TC06', 'TC07', 'TC08', 'TC09', 'TC10', 'TC11', 'TC12', 'TC13', 'TC14', 'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TC20', 'TC21', 'TC22', 'TC23', 'TC24', 'TC25', 'TC26', 'TC27', 'TC28', 'TC29', 'TC30', 'TC31', 'TC32', 'TC33', 'TC34', 'TC35', 'TC36', 'TC37', 'TC38', 'TC39', 'TC40', 'TC41', 'TC42', 'TC43', 'TC44', 'TC45', 'TC46', 'TC47', 'TC48', 'TC49', 'TC50', 'TC51', 'TC52', 'TC53', 'TC54', 'TC55', 'TC56', 'TC57', 'TC58', 'TC59', 'TC60', 'TC61', 'TC62', 'TC63', 'TC64', 'TC65', 'TC66', 'TC67', 'TC68', 'TC69', 'TC70', 'TC71', 'TC72', 'TC73', 'TC74', 'TC75', 'TC76', 'TC77', 'TC78'],
dtype={0:int, 1:float, 2:float, 3:float, 4:float, 5:float, 6:float, 7:float, 8:float, 9:float, 10:float, 11:float, 12:float, 13:float, 14:float, 15:float, 16:float, 17:float, 18:float, 19:float, 20:float, 21:float, 22:float, 23:float, 24:float, 25:float, 26:float, 27:float, 28:float, 29:float, 30:float, 31:float, 32:float, 33:float, 34:float, 35:float, 36:float, 37:float, 38:float, 39:float, 40:float, 41:float, 42:float, 43:float, 44:float, 45:float, 46:float, 47:float, 48:float, 49:float, 50:float, 51:float, 52:float, 53:float, 54:float, 55:float, 56:float, 57:float, 58:float, 59:float, 60:float, 61:float, 62:float, 63:float, 64:float, 65:float, 66:float, 67:float, 68:float, 69:float, 70:float, 71:float, 72:float, 73:float, 74:float, 75:float, 76:float, 77:float, 78:float, 79:float, 80:float, 81:float, 82:float, 83:float, 84:float, 85:float, 86:float, 87:float, 88:float, 89:float, 90:float},
na_values="?", skiprows=1) # TODO: only 3000 lines are loaded in the data
raw_df.sample(6)
Year | TA01 | TA02 | TA03 | TA04 | TA05 | TA06 | TA07 | TA08 | TA09 | ... | TC69 | TC70 | TC71 | TC72 | TC73 | TC74 | TC75 | TC76 | TC77 | TC78 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
332595 | 2004 | 51.19589 | 18.54077 | 12.55200 | 3.25000 | -7.79542 | -19.63461 | 0.27662 | -1.03994 | -1.57231 | ... | -16.43458 | -37.05054 | -100.93186 | 49.26529 | -1.47467 | -39.25996 | -22.44389 | -16.37939 | -49.67664 | 3.87491 |
230573 | 1989 | 51.44573 | 53.42621 | 35.83483 | -16.72867 | -48.13185 | -13.04248 | -45.74081 | -6.18791 | 16.60100 | ... | 9.94220 | -71.82280 | 59.40250 | 45.14201 | -2.72343 | 35.62292 | 37.99939 | 7.04862 | 21.08678 | 2.07779 |
364530 | 1987 | 44.85215 | 33.51052 | 27.44631 | 24.99783 | -15.02508 | 12.29223 | 10.57365 | 7.06412 | -1.29649 | ... | 5.05131 | 56.78609 | 136.98499 | -30.38374 | 24.51034 | 62.77834 | 44.80304 | 30.34866 | 105.17991 | 2.58183 |
82857 | 2002 | 50.25653 | 59.83236 | 37.80210 | -0.57762 | -1.64064 | -11.77884 | -2.25574 | -4.17007 | 4.05922 | ... | -4.12555 | 21.34620 | 29.72172 | 56.71419 | 2.61590 | 56.47152 | -26.05716 | -0.77059 | 29.40943 | -0.02311 |
108108 | 1971 | 45.48775 | -5.49790 | 9.56187 | 9.36977 | -11.15726 | -3.63341 | 11.00297 | -6.36722 | 5.37455 | ... | -29.83405 | 228.02525 | 53.68190 | -47.38609 | 14.62809 | 124.90797 | -26.61476 | 5.08838 | 295.42035 | 19.74883 |
446568 | 2005 | 49.59492 | 35.40110 | -8.11273 | -13.40502 | 3.18931 | -14.05923 | 3.04436 | -1.43375 | 3.60354 | ... | -0.40689 | -91.80271 | -29.51019 | -6.40710 | 6.99983 | 81.51023 | -83.44656 | 2.81049 | 24.80947 | -10.32877 |
6 rows × 91 columns
raw_df.shape
(463715, 91)
raw_df.columns
Index(['Year', 'TA01', 'TA02', 'TA03', 'TA04', 'TA05', 'TA06', 'TA07', 'TA08', 'TA09', 'TA10', 'TA11', 'TA12', 'TC01', 'TC02', 'TC03', 'TC04', 'TC05', 'TC06', 'TC07', 'TC08', 'TC09', 'TC10', 'TC11', 'TC12', 'TC13', 'TC14', 'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TC20', 'TC21', 'TC22', 'TC23', 'TC24', 'TC25', 'TC26', 'TC27', 'TC28', 'TC29', 'TC30', 'TC31', 'TC32', 'TC33', 'TC34', 'TC35', 'TC36', 'TC37', 'TC38', 'TC39', 'TC40', 'TC41', 'TC42', 'TC43', 'TC44', 'TC45', 'TC46', 'TC47', 'TC48', 'TC49', 'TC50', 'TC51', 'TC52', 'TC53', 'TC54', 'TC55', 'TC56', 'TC57', 'TC58', 'TC59', 'TC60', 'TC61', 'TC62', 'TC63', 'TC64', 'TC65', 'TC66', 'TC67', 'TC68', 'TC69', 'TC70', 'TC71', 'TC72', 'TC73', 'TC74', 'TC75', 'TC76', 'TC77', 'TC78'], dtype='object')
train, test = train_test_split(raw_df, test_size=0.2, random_state=42)
tr_feats = train.drop("Year", axis=1)
tr_target = train["Year"].values
te_feats = test.drop("Year", axis=1)
te_target = test["Year"].values
target_scaler = StandardScaler()
tr_target = target_scaler.fit_transform(tr_target.reshape(-1, 1)).reshape(-1)
te_target = target_scaler.transform(te_target.reshape(-1, 1)).reshape(-1)
quant_transform = QuantileTransformer(output_distribution='uniform',
n_quantiles=10000,
subsample=len(tr_feats),
random_state=42)
X_train_qs = quant_transform.fit_transform(tr_feats)
X_test_qs = quant_transform.transform(te_feats)
def train_spline_fm(embedding_dim: int, step_size: float, batch_size: int, num_knots: int, num_epochs: int,
callback: Callable[[int, float], None]=None):
bs = BSplineTransformer(num_knots, 3)
tr_indices, tr_weights, tr_offsets, tr_fields = spline_transform_dataset(X_train_qs, bs)
te_indices, te_weights, te_offsets, te_fields = spline_transform_dataset(X_test_qs, bs)
num_fields = X_train_qs.shape[1]
num_embeddings = int(max(np.max(tr_indices), np.max(te_indices)) + 1)
train_ds = TensorDataset(
torch.tensor(tr_indices, dtype=torch.int64),
torch.tensor(tr_weights, dtype=torch.float32),
torch.tensor(tr_offsets, dtype=torch.int64),
torch.tensor(tr_fields, dtype=torch.int64),
torch.tensor(tr_target, dtype=torch.float32))
test_ds = TensorDataset(
torch.tensor(te_indices, dtype=torch.int64),
torch.tensor(te_weights, dtype=torch.float32),
torch.tensor(te_offsets, dtype=torch.int64),
torch.tensor(te_fields, dtype=torch.int64),
torch.tensor(te_target, dtype=torch.float32))
trainer = FMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.MSELoss(), device)
def train_spline_objective(trial: optuna.Trial):
embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
batch_size = trial.suggest_int('batch_size', 32, 256)
num_knots = trial.suggest_int('num_knots', 3, 48)
num_epochs = trial.suggest_int('num_epochs', 5, 15)
def callback(epoch: int, loss: float):
trial.report(math.sqrt(loss), epoch)
if trial.should_prune():
raise optuna.TrialPruned()
return math.sqrt(train_spline_fm(embedding_dim, step_size, batch_size, num_knots, num_epochs,
callback=callback))
study = optuna.create_study(study_name='splines',
direction='minimize',
sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(train_spline_objective, n_trials=100)
[I 2023-05-16 18:53:23,460] A new study created in memory with name: splines [I 2023-05-16 18:56:43,426] Trial 0 finished with value: 1.2386136255730438 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 196, 'num_knots': 30, 'num_epochs': 6}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-16 19:03:07,293] Trial 1 finished with value: 9.578139736045964 and parameters: {'embedding_dim': 2, 'step_size': 0.012551115172973842, 'batch_size': 226, 'num_knots': 30, 'num_epochs': 12}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-16 19:06:49,945] Trial 2 finished with value: 1.4629122193885566 and parameters: {'embedding_dim': 1, 'step_size': 0.44447541666908114, 'batch_size': 219, 'num_knots': 12, 'num_epochs': 7}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-16 19:13:15,304] Trial 3 finished with value: 5.51720045029323 and parameters: {'embedding_dim': 2, 'step_size': 0.0328774741399112, 'batch_size': 150, 'num_knots': 22, 'num_epochs': 8}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-16 19:23:13,052] Trial 4 finished with value: 2.7195761291430305 and parameters: {'embedding_dim': 7, 'step_size': 0.017258215396625, 'batch_size': 97, 'num_knots': 19, 'num_epochs': 10}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-16 19:24:34,830] Trial 5 pruned. [I 2023-05-16 19:26:59,399] Trial 6 pruned. [I 2023-05-16 19:39:46,136] Trial 7 finished with value: 1.3228858834036774 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 53, 'num_knots': 34, 'num_epochs': 9}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-16 19:52:35,118] Trial 8 finished with value: 1.550295967639816 and parameters: {'embedding_dim': 2, 'step_size': 0.06938901412739397, 'batch_size': 39, 'num_knots': 44, 'num_epochs': 7}. Best is trial 0 with value: 1.2386136255730438. [I 2023-05-16 19:53:51,842] Trial 9 pruned. [I 2023-05-16 19:57:07,263] Trial 10 finished with value: 1.1966288679541184 and parameters: {'embedding_dim': 4, 'step_size': 0.38450727047302585, 'batch_size': 188, 'num_knots': 3, 'num_epochs': 5}. Best is trial 10 with value: 1.1966288679541184. [I 2023-05-16 20:00:16,413] Trial 11 finished with value: 1.166368747320352 and parameters: {'embedding_dim': 4, 'step_size': 0.45819653342095856, 'batch_size': 192, 'num_knots': 6, 'num_epochs': 5}. Best is trial 11 with value: 1.166368747320352. [I 2023-05-16 20:03:34,162] Trial 12 finished with value: 1.2247194669600276 and parameters: {'embedding_dim': 4, 'step_size': 0.20992321291982594, 'batch_size': 184, 'num_knots': 4, 'num_epochs': 5}. Best is trial 11 with value: 1.166368747320352. [I 2023-05-16 20:09:21,405] Trial 13 finished with value: 1.0230244392652823 and parameters: {'embedding_dim': 5, 'step_size': 0.25231843076021476, 'batch_size': 256, 'num_knots': 3, 'num_epochs': 12}. Best is trial 13 with value: 1.0230244392652823. [I 2023-05-16 20:15:25,054] Trial 14 finished with value: 1.1263353581741014 and parameters: {'embedding_dim': 5, 'step_size': 0.22060040200689754, 'batch_size': 256, 'num_knots': 12, 'num_epochs': 12}. Best is trial 13 with value: 1.0230244392652823. [I 2023-05-16 20:22:02,080] Trial 15 finished with value: 1.0999773131545263 and parameters: {'embedding_dim': 6, 'step_size': 0.18312582558712856, 'batch_size': 256, 'num_knots': 13, 'num_epochs': 13}. Best is trial 13 with value: 1.0230244392652823. [I 2023-05-16 20:28:42,275] Trial 16 finished with value: 1.0410950936336398 and parameters: {'embedding_dim': 10, 'step_size': 0.1475792084364477, 'batch_size': 247, 'num_knots': 12, 'num_epochs': 13}. Best is trial 13 with value: 1.0230244392652823. [I 2023-05-16 20:30:11,072] Trial 17 pruned. [I 2023-05-16 20:42:32,391] Trial 18 finished with value: 0.9837241670350367 and parameters: {'embedding_dim': 10, 'step_size': 0.11027358472172152, 'batch_size': 101, 'num_knots': 8, 'num_epochs': 13}. Best is trial 18 with value: 0.9837241670350367. [I 2023-05-16 20:52:31,862] Trial 19 finished with value: 1.0135361538325767 and parameters: {'embedding_dim': 9, 'step_size': 0.09653067289462239, 'batch_size': 106, 'num_knots': 8, 'num_epochs': 11}. Best is trial 18 with value: 0.9837241670350367. [I 2023-05-16 21:02:01,178] Trial 20 finished with value: 1.0474427804363677 and parameters: {'embedding_dim': 9, 'step_size': 0.08886733479770562, 'batch_size': 100, 'num_knots': 9, 'num_epochs': 10}. Best is trial 18 with value: 0.9837241670350367. [I 2023-05-16 21:03:17,069] Trial 21 pruned. [I 2023-05-16 21:12:46,324] Trial 22 finished with value: 0.9591751996469187 and parameters: {'embedding_dim': 8, 'step_size': 0.2688719862751585, 'batch_size': 77, 'num_knots': 16, 'num_epochs': 11}. Best is trial 22 with value: 0.9591751996469187. [I 2023-05-16 21:21:46,922] Trial 23 pruned. [I 2023-05-16 21:32:40,408] Trial 24 finished with value: 1.013309950705173 and parameters: {'embedding_dim': 10, 'step_size': 0.1381692732684226, 'batch_size': 122, 'num_knots': 20, 'num_epochs': 14}. Best is trial 22 with value: 0.9591751996469187. [I 2023-05-16 21:44:32,521] Trial 25 finished with value: 0.9766103503901942 and parameters: {'embedding_dim': 10, 'step_size': 0.30666487353085764, 'batch_size': 120, 'num_knots': 22, 'num_epochs': 14}. Best is trial 22 with value: 0.9591751996469187. [I 2023-05-16 21:57:07,523] Trial 26 finished with value: 0.9631696687473242 and parameters: {'embedding_dim': 8, 'step_size': 0.30221758573737095, 'batch_size': 74, 'num_knots': 26, 'num_epochs': 14}. Best is trial 22 with value: 0.9591751996469187. [I 2023-05-16 22:08:48,665] Trial 27 pruned. [I 2023-05-16 22:22:53,824] Trial 28 pruned. [I 2023-05-16 22:27:42,916] Trial 29 pruned. [I 2023-05-16 22:33:49,484] Trial 30 pruned. [I 2023-05-16 22:44:19,069] Trial 31 finished with value: 0.9559326717458044 and parameters: {'embedding_dim': 10, 'step_size': 0.1845435818497936, 'batch_size': 83, 'num_knots': 15, 'num_epochs': 13}. Best is trial 31 with value: 0.9559326717458044. [I 2023-05-16 22:45:30,091] Trial 32 pruned. [I 2023-05-16 22:59:50,871] Trial 33 finished with value: 0.9604147753141788 and parameters: {'embedding_dim': 10, 'step_size': 0.288261536780601, 'batch_size': 57, 'num_knots': 22, 'num_epochs': 13}. Best is trial 31 with value: 0.9559326717458044. [I 2023-05-16 23:15:48,223] Trial 34 finished with value: 0.9742608985868826 and parameters: {'embedding_dim': 8, 'step_size': 0.17867376960796208, 'batch_size': 59, 'num_knots': 26, 'num_epochs': 13}. Best is trial 31 with value: 0.9559326717458044. [I 2023-05-16 23:37:28,738] Trial 35 finished with value: 0.9171861274877934 and parameters: {'embedding_dim': 9, 'step_size': 0.2645542020430195, 'batch_size': 32, 'num_knots': 19, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-16 23:55:05,064] Trial 36 finished with value: 1.0132628334036484 and parameters: {'embedding_dim': 9, 'step_size': 0.40545523268469424, 'batch_size': 35, 'num_knots': 20, 'num_epochs': 9}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 00:05:42,393] Trial 37 finished with value: 0.9345916139732143 and parameters: {'embedding_dim': 10, 'step_size': 0.25690624184860333, 'batch_size': 59, 'num_knots': 14, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 00:20:05,720] Trial 38 finished with value: 0.9352145948828553 and parameters: {'embedding_dim': 9, 'step_size': 0.22319832065362558, 'batch_size': 49, 'num_knots': 14, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 00:26:44,288] Trial 39 pruned. [I 2023-05-17 00:41:30,166] Trial 40 finished with value: 0.9663647234913916 and parameters: {'embedding_dim': 9, 'step_size': 0.15557203352959853, 'batch_size': 48, 'num_knots': 18, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 00:49:39,157] Trial 41 finished with value: 0.9422524159401244 and parameters: {'embedding_dim': 10, 'step_size': 0.24528852921151278, 'batch_size': 63, 'num_knots': 10, 'num_epochs': 8}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 00:58:35,285] Trial 42 finished with value: 0.9372499450353899 and parameters: {'embedding_dim': 10, 'step_size': 0.2395050154883088, 'batch_size': 57, 'num_knots': 10, 'num_epochs': 8}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 01:06:42,836] Trial 43 finished with value: 0.9408245901462334 and parameters: {'embedding_dim': 10, 'step_size': 0.24135658451497335, 'batch_size': 64, 'num_knots': 10, 'num_epochs': 8}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 01:18:11,857] Trial 44 finished with value: 0.9583412080247737 and parameters: {'embedding_dim': 9, 'step_size': 0.39856936883703, 'batch_size': 42, 'num_knots': 10, 'num_epochs': 8}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 01:25:08,180] Trial 45 pruned. [I 2023-05-17 01:30:13,286] Trial 46 finished with value: 0.9433470225660775 and parameters: {'embedding_dim': 9, 'step_size': 0.3592001787790354, 'batch_size': 63, 'num_knots': 6, 'num_epochs': 6}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 01:42:03,625] Trial 47 finished with value: 0.931486584866254 and parameters: {'embedding_dim': 10, 'step_size': 0.225118064987353, 'batch_size': 43, 'num_knots': 14, 'num_epochs': 9}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 01:54:11,688] Trial 48 finished with value: 0.9828667060204228 and parameters: {'embedding_dim': 10, 'step_size': 0.45874685485020067, 'batch_size': 42, 'num_knots': 14, 'num_epochs': 9}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 01:58:13,715] Trial 49 pruned. [I 2023-05-17 02:09:46,741] Trial 50 finished with value: 0.9406328625524666 and parameters: {'embedding_dim': 9, 'step_size': 0.20765904624110154, 'batch_size': 50, 'num_knots': 13, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 02:21:07,688] Trial 51 finished with value: 0.9405742149964395 and parameters: {'embedding_dim': 9, 'step_size': 0.1969866703171254, 'batch_size': 51, 'num_knots': 13, 'num_epochs': 10}. Best is trial 35 with value: 0.9171861274877934. [I 2023-05-17 02:36:19,135] Trial 52 finished with value: 0.8842285075359627 and parameters: {'embedding_dim': 10, 'step_size': 0.2645915336156327, 'batch_size': 33, 'num_knots': 5, 'num_epochs': 9}. Best is trial 52 with value: 0.8842285075359627. [I 2023-05-17 02:50:02,113] Trial 53 finished with value: 0.8876547893343609 and parameters: {'embedding_dim': 10, 'step_size': 0.2727717956425, 'batch_size': 36, 'num_knots': 5, 'num_epochs': 9}. Best is trial 52 with value: 0.8842285075359627. [I 2023-05-17 03:04:02,753] Trial 54 finished with value: 0.8829253470615361 and parameters: {'embedding_dim': 10, 'step_size': 0.2762817857390965, 'batch_size': 35, 'num_knots': 4, 'num_epochs': 9}. Best is trial 54 with value: 0.8829253470615361. [I 2023-05-17 03:19:30,440] Trial 55 finished with value: 0.8949441081981341 and parameters: {'embedding_dim': 10, 'step_size': 0.2824114142172617, 'batch_size': 32, 'num_knots': 5, 'num_epochs': 9}. Best is trial 54 with value: 0.8829253470615361. [I 2023-05-17 03:34:35,882] Trial 56 finished with value: 0.8915999914951418 and parameters: {'embedding_dim': 10, 'step_size': 0.40545429992131593, 'batch_size': 33, 'num_knots': 3, 'num_epochs': 9}. Best is trial 54 with value: 0.8829253470615361. [I 2023-05-17 03:45:22,948] Trial 57 finished with value: 0.9174837819882632 and parameters: {'embedding_dim': 10, 'step_size': 0.42181265669918006, 'batch_size': 36, 'num_knots': 5, 'num_epochs': 7}. Best is trial 54 with value: 0.8829253470615361. [I 2023-05-17 04:01:08,351] Trial 58 finished with value: 0.8812943291843862 and parameters: {'embedding_dim': 10, 'step_size': 0.36073913832745863, 'batch_size': 32, 'num_knots': 3, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 04:02:48,388] Trial 59 pruned. [I 2023-05-17 04:03:31,415] Trial 60 pruned. [I 2023-05-17 04:18:57,676] Trial 61 finished with value: 0.8945438434097501 and parameters: {'embedding_dim': 9, 'step_size': 0.2875851804909289, 'batch_size': 33, 'num_knots': 6, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 04:34:24,387] Trial 62 finished with value: 0.9080907608582166 and parameters: {'embedding_dim': 10, 'step_size': 0.2859064435511358, 'batch_size': 33, 'num_knots': 6, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 04:46:47,936] Trial 63 finished with value: 0.923213213745785 and parameters: {'embedding_dim': 10, 'step_size': 0.3574927238399868, 'batch_size': 41, 'num_knots': 8, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 05:02:15,375] Trial 64 finished with value: 0.9303926950883815 and parameters: {'embedding_dim': 9, 'step_size': 0.4984824913019317, 'batch_size': 32, 'num_knots': 5, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 05:12:20,767] Trial 65 finished with value: 0.8836246232120878 and parameters: {'embedding_dim': 10, 'step_size': 0.2918598742790804, 'batch_size': 45, 'num_knots': 3, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 05:13:23,409] Trial 66 pruned. [I 2023-05-17 05:23:17,634] Trial 67 finished with value: 0.9203880084785563 and parameters: {'embedding_dim': 10, 'step_size': 0.2978806762773378, 'batch_size': 46, 'num_knots': 7, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 05:24:07,220] Trial 68 pruned. [I 2023-05-17 05:25:26,541] Trial 69 pruned. [I 2023-05-17 05:38:54,831] Trial 70 finished with value: 0.8885545925677477 and parameters: {'embedding_dim': 6, 'step_size': 0.3377290795161348, 'batch_size': 40, 'num_knots': 4, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 05:53:02,508] Trial 71 finished with value: 0.898234601537262 and parameters: {'embedding_dim': 6, 'step_size': 0.32131412729525716, 'batch_size': 39, 'num_knots': 4, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 05:55:20,996] Trial 72 pruned. [I 2023-05-17 05:57:42,907] Trial 73 pruned. [I 2023-05-17 06:09:46,746] Trial 74 finished with value: 0.9336780206801858 and parameters: {'embedding_dim': 10, 'step_size': 0.3363325348006269, 'batch_size': 38, 'num_knots': 8, 'num_epochs': 7}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 06:10:46,772] Trial 75 pruned. [I 2023-05-17 06:23:23,832] Trial 76 finished with value: 0.9035483242637248 and parameters: {'embedding_dim': 10, 'step_size': 0.3023068642066013, 'batch_size': 46, 'num_knots': 6, 'num_epochs': 8}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 06:34:00,400] Trial 77 finished with value: 0.9356443477813984 and parameters: {'embedding_dim': 8, 'step_size': 0.3707428489721444, 'batch_size': 53, 'num_knots': 8, 'num_epochs': 9}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 06:40:20,237] Trial 78 finished with value: 0.8893062800372801 and parameters: {'embedding_dim': 9, 'step_size': 0.27560158762743114, 'batch_size': 66, 'num_knots': 3, 'num_epochs': 10}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 06:48:23,467] Trial 79 finished with value: 0.8840000301227305 and parameters: {'embedding_dim': 10, 'step_size': 0.2621309491504174, 'batch_size': 68, 'num_knots': 3, 'num_epochs': 11}. Best is trial 58 with value: 0.8812943291843862. [I 2023-05-17 06:49:38,654] Trial 80 pruned. [I 2023-05-17 06:50:44,419] Trial 81 pruned. [I 2023-05-17 07:01:59,621] Trial 82 finished with value: 0.8794329950964015 and parameters: {'embedding_dim': 10, 'step_size': 0.24654623010907453, 'batch_size': 68, 'num_knots': 3, 'num_epochs': 12}. Best is trial 82 with value: 0.8794329950964015. [I 2023-05-17 07:03:01,386] Trial 83 pruned. [I 2023-05-17 07:04:27,287] Trial 84 pruned. [I 2023-05-17 07:05:37,113] Trial 85 pruned. [I 2023-05-17 07:06:37,147] Trial 86 pruned. [I 2023-05-17 07:07:52,641] Trial 87 pruned. [I 2023-05-17 07:10:41,882] Trial 88 pruned. [I 2023-05-17 07:12:02,122] Trial 89 pruned. [I 2023-05-17 07:13:00,456] Trial 90 pruned. [I 2023-05-17 07:25:04,550] Trial 91 finished with value: 0.8899198718915379 and parameters: {'embedding_dim': 10, 'step_size': 0.38574848982787274, 'batch_size': 37, 'num_knots': 3, 'num_epochs': 8}. Best is trial 82 with value: 0.8794329950964015. [I 2023-05-17 07:36:52,813] Trial 92 finished with value: 0.8794658998359767 and parameters: {'embedding_dim': 10, 'step_size': 0.24069912203863186, 'batch_size': 38, 'num_knots': 3, 'num_epochs': 8}. Best is trial 82 with value: 0.8794329950964015. [I 2023-05-17 07:44:42,769] Trial 93 finished with value: 0.9155397785613877 and parameters: {'embedding_dim': 10, 'step_size': 0.2388690985342639, 'batch_size': 53, 'num_knots': 6, 'num_epochs': 7}. Best is trial 82 with value: 0.8794329950964015. [I 2023-05-17 07:45:35,447] Trial 94 pruned. [I 2023-05-17 07:58:02,324] Trial 95 finished with value: 0.9002698705631274 and parameters: {'embedding_dim': 9, 'step_size': 0.22749984739763673, 'batch_size': 46, 'num_knots': 5, 'num_epochs': 10}. Best is trial 82 with value: 0.8794329950964015. [I 2023-05-17 08:00:11,700] Trial 96 pruned. [I 2023-05-17 08:08:32,642] Trial 97 finished with value: 0.8877704114380222 and parameters: {'embedding_dim': 9, 'step_size': 0.2560666131932691, 'batch_size': 71, 'num_knots': 3, 'num_epochs': 11}. Best is trial 82 with value: 0.8794329950964015. [I 2023-05-17 08:09:38,192] Trial 98 pruned. [I 2023-05-17 08:14:32,944] Trial 99 pruned.
trial = study.best_trial
print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
Test loss: 0.8794329950964015 Best hyperparameters: {'embedding_dim': 10, 'step_size': 0.24654623010907453, 'batch_size': 68, 'num_knots': 3, 'num_epochs': 12}
study.best_params
{'embedding_dim': 10, 'step_size': 0.24654623010907453, 'batch_size': 68, 'num_knots': 3, 'num_epochs': 12}
train_spline_fm(**study.best_params)
0.7787345051765442
spline_losses = []
for i in trange(20):
loss = train_spline_fm(**study.best_params)
spline_losses.append(math.sqrt(loss))
100%|██████████| 20/20 [2:39:59<00:00, 479.95s/it]
spline_losses
[0.881695640108332, 0.881986518583829, 0.8812350129821337, 0.8835068731661306, 0.8756028550892196, 0.8795998443241684, 0.8812374141146075, 0.8793456949910461, 0.879686949816137, 0.8792196435620432, 0.8800452079323017, 0.8800887905106133, 0.8787611778058435, 0.8798512104892138, 0.8808272328401603, 0.8801772358534317, 0.8815416286395354, 0.8799189857558083, 0.8787488669310817, 0.8843083494702224]
np.mean(spline_losses), np.std(spline_losses), 100 * np.std(spline_losses) / np.mean(spline_losses)
(0.880369256648293, 0.0018009996668723587, 0.2045732121234053)
def train_bin_fm(embedding_dim: int, step_size: float, batch_size: int,
num_bins: int, bin_strategy: str, num_epochs: int,
callback: Callable[[int, float], None]=None):
num_fields = tr_feats.shape[1]
num_embeddings = num_fields * num_bins
index_offsets = np.arange(0, num_fields) * num_bins
discretizer = KBinsDiscretizer(num_bins, encode='ordinal', strategy=bin_strategy, random_state=42)
discretizer.fit(tr_feats)
tr_indices = discretizer.transform(tr_feats)
tr_indices += np.tile(index_offsets, (tr_indices.shape[0], 1))
tr_weights = np.ones_like(tr_indices)
tr_fields = np.tile(np.arange(0, num_fields), (tr_indices.shape[0], 1))
tr_offsets = tr_fields.copy()
te_indices = discretizer.transform(te_feats)
te_indices += np.tile(index_offsets, (te_indices.shape[0], 1))
te_weights = np.ones_like(te_indices)
te_fields = np.tile(np.arange(0, num_fields), (te_indices.shape[0], 1))
te_offsets = te_fields.copy()
train_ds = TensorDataset(
torch.tensor(tr_indices, dtype=torch.int64),
torch.tensor(tr_weights, dtype=torch.float32),
torch.tensor(tr_offsets, dtype=torch.int64),
torch.tensor(tr_fields, dtype=torch.int64),
torch.tensor(tr_target, dtype=torch.float32))
test_ds = TensorDataset(
torch.tensor(te_indices, dtype=torch.int64),
torch.tensor(te_weights, dtype=torch.float32),
torch.tensor(te_offsets, dtype=torch.int64),
torch.tensor(te_fields, dtype=torch.int64),
torch.tensor(te_target, dtype=torch.float32))
trainer = FMTrainer(embedding_dim, step_size, batch_size, num_epochs, callback)
return trainer.train(num_fields, num_embeddings, train_ds, test_ds, torch.nn.MSELoss(), device)
def test_bins_objective(trial: optuna.Trial):
embedding_dim = trial.suggest_int('embedding_dim', 1, 10)
step_size = trial.suggest_float('step_size', 1e-2, 0.5, log=True)
batch_size = trial.suggest_int('batch_size', 32, 256)
num_bins = trial.suggest_int('num_bins', 2, 100)
bin_strategy = trial.suggest_categorical('bin_strategy', ['uniform', 'quantile'])
num_epochs = trial.suggest_int('num_epochs', 5, 15)
def callback(epoch: int, loss: float):
trial.report(math.sqrt(loss), epoch)
if trial.should_prune():
raise optuna.TrialPruned()
return math.sqrt(train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs,
callback=callback))
study_bins = optuna.create_study(study_name='bins',
direction='minimize',
sampler=optuna.samplers.TPESampler(seed=42))
study_bins.optimize(test_bins_objective, n_trials=100)
[I 2023-05-17 08:23:55,739] A new study created in memory with name: bins [I 2023-05-17 08:25:38,113] Trial 0 finished with value: 1.5271531398304155 and parameters: {'embedding_dim': 4, 'step_size': 0.4123206532618726, 'batch_size': 196, 'num_bins': 61, 'bin_strategy': 'uniform', 'num_epochs': 5}. Best is trial 0 with value: 1.5271531398304155. [I 2023-05-17 08:28:04,884] Trial 1 finished with value: 0.9448509350530763 and parameters: {'embedding_dim': 9, 'step_size': 0.10502105436744279, 'batch_size': 191, 'num_bins': 4, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9448509350530763. [I 2023-05-17 08:34:45,404] Trial 2 finished with value: 5.603986016331611 and parameters: {'embedding_dim': 2, 'step_size': 0.020492680115417352, 'batch_size': 100, 'num_bins': 53, 'bin_strategy': 'uniform', 'num_epochs': 11}. Best is trial 1 with value: 0.9448509350530763. [I 2023-05-17 08:40:16,860] Trial 3 finished with value: 3.487825336116525 and parameters: {'embedding_dim': 2, 'step_size': 0.03135775732257745, 'batch_size': 114, 'num_bins': 47, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 1 with value: 0.9448509350530763. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 08:45:51,720] Trial 4 finished with value: 5.078675976720081 and parameters: {'embedding_dim': 6, 'step_size': 0.011992724522955167, 'batch_size': 168, 'num_bins': 18, 'bin_strategy': 'quantile', 'num_epochs': 15}. Best is trial 1 with value: 0.9448509350530763. [I 2023-05-17 08:56:25,390] Trial 5 finished with value: 1.5473596603529345 and parameters: {'embedding_dim': 9, 'step_size': 0.032925293631105246, 'batch_size': 53, 'num_bins': 69, 'bin_strategy': 'uniform', 'num_epochs': 10}. Best is trial 1 with value: 0.9448509350530763. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 09:03:33,049] Trial 6 finished with value: 1.2903412458488763 and parameters: {'embedding_dim': 1, 'step_size': 0.35067764992972184, 'batch_size': 90, 'num_bins': 67, 'bin_strategy': 'quantile', 'num_epochs': 11}. Best is trial 1 with value: 0.9448509350530763. [I 2023-05-17 09:08:53,444] Trial 7 finished with value: 1.204044238579979 and parameters: {'embedding_dim': 2, 'step_size': 0.4439102767051397, 'batch_size': 206, 'num_bins': 95, 'bin_strategy': 'uniform', 'num_epochs': 15}. Best is trial 1 with value: 0.9448509350530763. [I 2023-05-17 09:10:11,410] Trial 8 pruned. [I 2023-05-17 09:10:36,489] Trial 9 pruned. /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:209: FutureWarning: In version 1.3 onwards, subsample=2e5 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly. warnings.warn( [I 2023-05-17 09:12:05,260] Trial 10 finished with value: 1.3519945281047807 and parameters: {'embedding_dim': 10, 'step_size': 0.12983523609376665, 'batch_size': 250, 'num_bins': 3, 'bin_strategy': 'quantile', 'num_epochs': 5}. Best is trial 1 with value: 0.9448509350530763. [I 2023-05-17 09:14:21,947] Trial 11 finished with value: 1.5510122285726062 and parameters: {'embedding_dim': 8, 'step_size': 0.1376872218807011, 'batch_size': 216, 'num_bins': 100, 'bin_strategy': 'uniform', 'num_epochs': 7}. Best is trial 1 with value: 0.9448509350530763. [W 2023-05-17 09:14:22,281] Trial 12 failed with parameters: {'embedding_dim': 7, 'step_size': 0.21367114396906817, 'batch_size': 202, 'num_bins': 95, 'bin_strategy': 'uniform', 'num_epochs': 8} because of the following error: KeyboardInterrupt(). Traceback (most recent call last): File "/usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py", line 200, in _run_trial value_or_values = func(trial) File "/tmp/ipykernel_258955/2787357413.py", line 14, in test_bins_objective return math.sqrt(train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs, File "/tmp/ipykernel_258955/1864156579.py", line 11, in train_bin_fm tr_indices = discretizer.transform(tr_feats) File "/usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py", line 375, in transform Xt[:, jj] = np.searchsorted(bin_edges[jj][1:-1], Xt[:, jj], side="right") File "<__array_function__ internals>", line 180, in searchsorted File "/usr/local/lib64/python3.9/site-packages/numpy/core/fromnumeric.py", line 1387, in searchsorted return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter) File "/usr/local/lib64/python3.9/site-packages/numpy/core/fromnumeric.py", line 57, in _wrapfunc return bound(*args, **kwds) KeyboardInterrupt [W 2023-05-17 09:14:22,283] Trial 12 failed with value None.
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Input In [21], in <cell line: 4>() 1 study_bins = optuna.create_study(study_name='bins', 2 direction='minimize', 3 sampler=optuna.samplers.TPESampler(seed=42)) ----> 4 study_bins.optimize(test_bins_objective, n_trials=100) File /usr/local/lib/python3.9/site-packages/optuna/study/study.py:425, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar) 321 def optimize( 322 self, 323 func: ObjectiveFuncType, (...) 330 show_progress_bar: bool = False, 331 ) -> None: 332 """Optimize an objective function. 333 334 Optimization is done by choosing a suitable set of hyperparameter values from a given (...) 422 If nested invocation of this method occurs. 423 """ --> 425 _optimize( 426 study=self, 427 func=func, 428 n_trials=n_trials, 429 timeout=timeout, 430 n_jobs=n_jobs, 431 catch=tuple(catch) if isinstance(catch, Iterable) else (catch,), 432 callbacks=callbacks, 433 gc_after_trial=gc_after_trial, 434 show_progress_bar=show_progress_bar, 435 ) File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:66, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar) 64 try: 65 if n_jobs == 1: ---> 66 _optimize_sequential( 67 study, 68 func, 69 n_trials, 70 timeout, 71 catch, 72 callbacks, 73 gc_after_trial, 74 reseed_sampler_rng=False, 75 time_start=None, 76 progress_bar=progress_bar, 77 ) 78 else: 79 if n_jobs == -1: File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:163, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar) 160 break 162 try: --> 163 frozen_trial = _run_trial(study, func, catch) 164 finally: 165 # The following line mitigates memory problems that can be occurred in some 166 # environments (e.g., services that use computing containers such as GitHub Actions). 167 # Please refer to the following PR for further details: 168 # https://github.com/optuna/optuna/pull/325. 169 if gc_after_trial: File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:251, in _run_trial(study, func, catch) 244 assert False, "Should not reach." 246 if ( 247 frozen_trial.state == TrialState.FAIL 248 and func_err is not None 249 and not isinstance(func_err, catch) 250 ): --> 251 raise func_err 252 return frozen_trial File /usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py:200, in _run_trial(study, func, catch) 198 with get_heartbeat_thread(trial._trial_id, study._storage): 199 try: --> 200 value_or_values = func(trial) 201 except exceptions.TrialPruned as e: 202 # TODO(mamu): Handle multi-objective cases. 203 state = TrialState.PRUNED Input In [20], in test_bins_objective(trial) 11 if trial.should_prune(): 12 raise optuna.TrialPruned() ---> 14 return math.sqrt(train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs, 15 callback=callback)) Input In [19], in train_bin_fm(embedding_dim, step_size, batch_size, num_bins, bin_strategy, num_epochs, callback) 8 discretizer = KBinsDiscretizer(num_bins, encode='ordinal', strategy=bin_strategy, random_state=42) 9 discretizer.fit(tr_feats) ---> 11 tr_indices = discretizer.transform(tr_feats) 12 tr_indices += np.tile(index_offsets, (tr_indices.shape[0], 1)) 13 tr_weights = np.ones_like(tr_indices) File /usr/local/lib64/python3.9/site-packages/sklearn/preprocessing/_discretization.py:375, in KBinsDiscretizer.transform(self, X) 373 bin_edges = self.bin_edges_ 374 for jj in range(Xt.shape[1]): --> 375 Xt[:, jj] = np.searchsorted(bin_edges[jj][1:-1], Xt[:, jj], side="right") 377 if self.encode == "ordinal": 378 return Xt File <__array_function__ internals>:180, in searchsorted(*args, **kwargs) File /usr/local/lib64/python3.9/site-packages/numpy/core/fromnumeric.py:1387, in searchsorted(a, v, side, sorter) 1319 @array_function_dispatch(_searchsorted_dispatcher) 1320 def searchsorted(a, v, side='left', sorter=None): 1321 """ 1322 Find indices where elements should be inserted to maintain order. 1323 (...) 1385 1386 """ -> 1387 return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter) File /usr/local/lib64/python3.9/site-packages/numpy/core/fromnumeric.py:57, in _wrapfunc(obj, method, *args, **kwds) 54 return _wrapit(obj, method, *args, **kwds) 56 try: ---> 57 return bound(*args, **kwds) 58 except TypeError: 59 # A TypeError occurs if the object does have such a method in its 60 # class, but its signature is not identical to that of NumPy's. This (...) 64 # Call _wrapit from within the except clause to ensure a potential 65 # exception has a traceback chain. 66 return _wrapit(obj, method, *args, **kwds) KeyboardInterrupt:
study_bins.best_params
trial = study_bins.best_trial
print('Test loss: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))
train_bin_fm(**study_bins.best_params)
bin_losses = []
for i in trange(20):
loss = train_bin_fm(**study_bins.best_params)
bin_losses.append(math.sqrt(loss))
bin_losses
np.mean(bin_losses), np.std(bin_losses)