In [1]:
# declare a list tasks whose products you want to use as inputs
upstream = None
In [2]:
# Parameters
product = {"nb": "/home/ubuntu/vitmtsc_nbdev/output/601_benchmark_rocket.html"}
In [3]:
#| default_exp benchmark.rocket
%load_ext autoreload
%autoreload 2
In [4]:
#| hide
from nbdev.showdoc import *
In [5]:
#| export

import numpy as np
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sktime.datasets import load_UCR_UEA_dataset  # multivariate dataset
from sktime.transformations.panel.rocket import Rocket
from sktime.transformations.panel.padder import PaddingTransformer

def evaluate_rocket(dataset_name, max_length):

    rocket_pipeline = make_pipeline(
        PaddingTransformer(max_length), Rocket(), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    )

    X_train, y_train = load_UCR_UEA_dataset(
        name=dataset_name, split="train", return_X_y=True
    )

    # it is necessary to pass y_train to the pipeline
    # y_train is not used for the transform, but it is used by the classifier
    rocket_pipeline.fit(X_train, y_train)

    X_test, y_test = load_UCR_UEA_dataset(
        name=dataset_name, split="test", return_X_y=True
    )

    print(f"Rocket/{dataset_name}: {rocket_pipeline.score(X_test, y_test)}")
In [6]:
%%time
evaluate_rocket("FaceDetection", 62)
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sktime/datasets/_data_io.py:928: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`
  data["dim_" + str(dim)] = instance_list[dim]
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sklearn/linear_model/_base.py:141: FutureWarning: 'normalize' was deprecated in version 1.0 and will be removed in 1.2.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alphas to: original_alphas * n_samples. 
  warnings.warn(
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sktime/datasets/_data_io.py:928: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`
  data["dim_" + str(dim)] = instance_list[dim]
Rocket/FaceDetection: 0.6359250851305335
CPU times: user 1h 3min 57s, sys: 3min 23s, total: 1h 7min 20s
Wall time: 58min 40s
In [7]:
%%time
evaluate_rocket("InsectWingbeat", 22)
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sktime/datasets/_data_io.py:928: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`
  data["dim_" + str(dim)] = instance_list[dim]
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sklearn/linear_model/_base.py:141: FutureWarning: 'normalize' was deprecated in version 1.0 and will be removed in 1.2.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alphas to: original_alphas * n_samples. 
  warnings.warn(
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sktime/datasets/_data_io.py:928: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`
  data["dim_" + str(dim)] = instance_list[dim]
Rocket/InsectWingbeat: 0.53716
CPU times: user 16h 13min 23s, sys: 1h 40min 27s, total: 17h 53min 50s
Wall time: 2h 40min 11s
In [8]:
%%time
evaluate_rocket("PenDigits", 8)
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sklearn/linear_model/_base.py:141: FutureWarning: 'normalize' was deprecated in version 1.0 and will be removed in 1.2.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alphas to: original_alphas * n_samples. 
  warnings.warn(
Rocket/PenDigits: 0.9777015437392796
CPU times: user 11min 22s, sys: 3min 48s, total: 15min 11s
Wall time: 1min 21s
In [9]:
%%time
evaluate_rocket("SpokenArabicDigits", 93)
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sklearn/linear_model/_base.py:141: FutureWarning: 'normalize' was deprecated in version 1.0 and will be removed in 1.2.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alphas to: original_alphas * n_samples. 
  warnings.warn(
Rocket/SpokenArabicDigits: 0.9977262391996362
CPU times: user 19min 47s, sys: 3min 11s, total: 22min 59s
Wall time: 12min 46s
In [10]:
%%time
evaluate_rocket("CharacterTrajectories", 182)
/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/sklearn/linear_model/_base.py:141: FutureWarning: 'normalize' was deprecated in version 1.0 and will be removed in 1.2.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alphas to: original_alphas * n_samples. 
  warnings.warn(
Rocket/CharacterTrajectories: 0.9930362116991643
CPU times: user 4min 1s, sys: 37.8 s, total: 4min 39s
Wall time: 3min 27s
In [11]:
from nbdev import nbdev_export
nbdev_export()