#| default_exp feature_preprocessing.character_trajectories.tabular_to_timeseries
%load_ext autoreload
%autoreload 2
# declare a list tasks whose products you want to use as inputs
upstream = ['feature_preprocessing_character_trajectories']
# Parameters
upstream = {"feature_preprocessing_character_trajectories": {"nb": "/home/ubuntu/vitmtsc_nbdev/output/205_feature_preprocessing.character_trajectories.target_encoding.html", "CharacterTrajectories_TRAIN_TE": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding/train", "CharacterTrajectories_VALID_TE": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding/valid", "CharacterTrajectories_TEST_TE": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding/test", "CharacterTrajectories_workflow_dir": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding/nvtabular_workflow"}}
product = {"nb": "/home/ubuntu/vitmtsc_nbdev/output/305_feature_preprocessing.character_trajectories.tabular_to_timeseries.html", "CharacterTrajectories_TRAIN_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/train", "CharacterTrajectories_VALID_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/valid", "CharacterTrajectories_TEST_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/test"}
#| hide
from nbdev.showdoc import *
#| export
from vitmtsc import *
from vitmtsc.core import *
from vitmtsc.data.character_trajectories import *
from vitmtsc.feature_preprocessing.character_trajectories.target_encoding import *
import os
# |export
upstream = {
"feature_preprocessing_character_trajectories": {
"nb": "/home/ubuntu/vitmtsc_nbdev/output/205_feature_preprocessing.character_trajectories.target_encoding.html",
"CharacterTrajectories_TRAIN_TE": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding/train",
"CharacterTrajectories_VALID_TE": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding/valid",
"CharacterTrajectories_TEST_TE": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding/test",
"CharacterTrajectories_workflow_dir": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding/nvtabular_workflow",
}
}
product = {
"nb": "/home/ubuntu/vitmtsc_nbdev/output/305_feature_preprocessing.character_trajectories.tabular_to_timeseries.html",
"CharacterTrajectories_TRAIN_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/train",
"CharacterTrajectories_VALID_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/valid",
"CharacterTrajectories_TEST_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/test",
}
Convert Category Encoding data from tabular to time-series format
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
cluster = LocalCUDACluster(memory_limit='auto', device_memory_limit=0.5, rmm_pool_size='20GB', rmm_managed_memory=True)
client = Client(cluster)
client
2022-09-23 19:02:06,330 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:02:06,330 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:02:06,421 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:02:06,421 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:02:06,426 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:02:06,426 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:02:06,437 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:02:06,437 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
Client-380c9110-3b72-11ed-8103-0a4f0fdf7975
Connection method: Cluster object | Cluster type: dask_cuda.LocalCUDACluster |
Dashboard: http://127.0.0.1:8787/status |
5d00441e
Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
Total threads: 4 | Total memory: 150.00 GiB |
Status: running | Using processes: True |
Scheduler-0e2624ee-1240-4fd7-b571-d8c018eba423
Comm: tcp://127.0.0.1:39481 | Workers: 4 |
Dashboard: http://127.0.0.1:8787/status | Total threads: 4 |
Started: Just now | Total memory: 150.00 GiB |
Comm: tcp://127.0.0.1:44667 | Total threads: 1 |
Dashboard: http://127.0.0.1:46767/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:33709 | |
Local directory: /tmp/dask-worker-space/worker-isaxfxlx | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:34099 | Total threads: 1 |
Dashboard: http://127.0.0.1:44185/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:35049 | |
Local directory: /tmp/dask-worker-space/worker-_njv48sa | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:46367 | Total threads: 1 |
Dashboard: http://127.0.0.1:33237/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:40065 | |
Local directory: /tmp/dask-worker-space/worker-423vni88 | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:36545 | Total threads: 1 |
Dashboard: http://127.0.0.1:42827/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:44461 | |
Local directory: /tmp/dask-worker-space/worker-fy1l03qn | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
#| export
DATASET_NAME = 'CharacterTrajectories'
SEQUENCE_LENGTH = 182
NUMBER_OF_FEATURES = 3
NUM_TARGET = 20
Convert from Tabular to Time-Series Format
#| export
MTSC_COLUMN_NAMES = [
'dim_0',
'dim_1',
'dim_2']
#| export
ALL_COLUMNS = ['case_id', 'case_id_seq', 'reading_id'] + MTSC_COLUMN_NAMES + ['class_vals']
Input Data Location
target_encoded_train_dir = os.path.join("./", upstream['feature_preprocessing_character_trajectories']['CharacterTrajectories_TRAIN_TE'])
target_encoded_valid_dir = os.path.join("./", upstream['feature_preprocessing_character_trajectories']['CharacterTrajectories_VALID_TE'])
target_encoded_test_dir = os.path.join("./", upstream['feature_preprocessing_character_trajectories']['CharacterTrajectories_TEST_TE'])
Output Data Location
output_train_dir = os.path.join("./", product['CharacterTrajectories_TRAIN_MODEL_INPUT'])
output_valid_dir = os.path.join("./", product['CharacterTrajectories_VALID_MODEL_INPUT'])
output_test_dir = os.path.join("./", product['CharacterTrajectories_TEST_MODEL_INPUT'])
!mkdir -p $output_train_dir
!mkdir -p $output_valid_dir
!mkdir -p $output_test_dir
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_train_dir,
output_dir = output_train_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 0 case_id_seq_max: 1421 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 1421 Before CumCount Min: 0 CumCount Max: 179 After CumCount Min: 0 CumCount Max: 179 sorted flattened_gdf.shape: (1137, 548) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/train/chunk_0_part_0.parquet with records from iloc: 0 to 1137 Finished processing chunk: 0 with case_id_seq from : 0 to 1421 CPU times: user 3.37 s, sys: 598 ms, total: 3.97 s Wall time: 5.65 s
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_valid_dir,
output_dir = output_valid_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 23 case_id_seq_max: 1418 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 1418 Before CumCount Min: 0 CumCount Max: 172 After CumCount Min: 0 CumCount Max: 172 sorted flattened_gdf.shape: (285, 548) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/valid/chunk_0_part_0.parquet with records from iloc: 0 to 285 Finished processing chunk: 0 with case_id_seq from : 0 to 1418 CPU times: user 740 ms, sys: 60.7 ms, total: 801 ms Wall time: 818 ms
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_test_dir,
output_dir = output_test_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 0 case_id_seq_max: 1435 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 1435 Before CumCount Min: 0 CumCount Max: 181 After CumCount Min: 0 CumCount Max: 181 sorted flattened_gdf.shape: (1436, 548) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/target_encoding-nn/test/chunk_0_part_0.parquet with records from iloc: 0 to 1436 Finished processing chunk: 0 with case_id_seq from : 0 to 1435 CPU times: user 800 ms, sys: 77.8 ms, total: 878 ms Wall time: 898 ms
%%time
import dask_cudf
train_gdf = dask_cudf.read_parquet(output_train_dir)
train_gdf.head()
CPU times: user 321 ms, sys: 20.1 ms, total: 341 ms Wall time: 485 ms
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_2_174 | dim_2_175 | dim_2_176 | dim_2_177 | dim_2_178 | dim_2_179 | dim_2_180 | dim_2_181 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.107928 | -0.101998 | -0.100830 | -0.107980 | -0.125284 | -0.153670 | -0.193613 | -0.243723 | -0.299566 | -0.354739 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 13.0 | 954.0 |
1 | -0.241938 | -0.282187 | -0.316187 | -0.344641 | -0.373353 | -0.409349 | -0.457755 | -0.520926 | -0.598919 | -0.690191 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 400.0 |
2 | -0.285200 | -0.343543 | -0.400658 | -0.459077 | -0.524088 | -0.599595 | -0.686902 | -0.785901 | -0.896377 | -1.018127 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 251.0 |
3 | -0.258288 | -0.268151 | -0.247131 | -0.196936 | -0.124711 | -0.037351 | 0.060679 | 0.167194 | 0.281728 | 0.404279 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 375.0 |
4 | -0.149945 | -0.154712 | -0.150748 | -0.133831 | -0.100650 | -0.050338 | 0.014458 | 0.089919 | 0.174723 | 0.270248 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 366.0 |
5 rows × 548 columns
train_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_2_174 | dim_2_175 | dim_2_176 | dim_2_177 | dim_2_178 | dim_2_179 | dim_2_180 | dim_2_181 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1132 | -0.067803 | -0.013844 | 0.067091 | 0.175923 | 0.311758 | 0.473402 | 0.659267 | 0.866019 | 1.087547 | 1.315279 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 19.0 | 1351.0 |
1133 | -0.394238 | -0.475264 | -0.536898 | -0.578857 | -0.607846 | -0.630126 | -0.648238 | -0.662181 | -0.671709 | -0.677333 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11.0 | 828.0 |
1134 | -0.102479 | -0.110677 | -0.127336 | -0.150269 | -0.177369 | -0.207631 | -0.240525 | -0.275543 | -0.312820 | -0.353628 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 57.0 |
1135 | 0.143576 | 0.299206 | 0.488988 | 0.704623 | 0.937382 | 1.179030 | 1.421854 | 1.658054 | 1.878826 | 2.073959 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 19.0 | 1385.0 |
1136 | -0.374473 | -0.465755 | -0.545642 | -0.610448 | -0.665223 | -0.717764 | -0.773475 | -0.833931 | -0.898080 | -0.963991 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10.0 | 779.0 |
5 rows × 548 columns
%%time
train_gdf['case_id'].nunique().compute(), train_gdf['class_vals'].nunique().compute()
CPU times: user 384 ms, sys: 20.5 ms, total: 405 ms Wall time: 473 ms
(1137, 20)
%%time
import dask_cudf
valid_gdf = dask_cudf.read_parquet(output_valid_dir)
valid_gdf.head()
CPU times: user 311 ms, sys: 17 ms, total: 328 ms Wall time: 436 ms
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_2_174 | dim_2_175 | dim_2_176 | dim_2_177 | dim_2_178 | dim_2_179 | dim_2_180 | dim_2_181 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.067818 | -0.110913 | -0.200297 | -0.329535 | -0.484393 | -0.652534 | -0.828739 | -1.013061 | -1.205244 | -1.400698 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 17.0 | 1247.0 |
1 | -0.155226 | -0.174237 | -0.192516 | -0.207862 | -0.222582 | -0.241315 | -0.266942 | -0.299259 | -0.336715 | -0.377834 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 15.0 | 1110.0 |
2 | -0.197612 | -0.200153 | -0.181484 | -0.140233 | -0.077249 | 0.006616 | 0.109389 | 0.226975 | 0.355045 | 0.491321 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 351.0 |
3 | -0.156387 | -0.163468 | -0.159924 | -0.142182 | -0.108299 | -0.055478 | 0.019574 | 0.118058 | 0.238554 | 0.378835 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 19.0 | 1395.0 |
4 | -0.298142 | -0.325184 | -0.321029 | -0.287726 | -0.234856 | -0.171204 | -0.100279 | -0.021632 | 0.065202 | 0.158812 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 381.0 |
5 rows × 548 columns
valid_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_2_174 | dim_2_175 | dim_2_176 | dim_2_177 | dim_2_178 | dim_2_179 | dim_2_180 | dim_2_181 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
280 | -0.168708 | -0.191889 | -0.217918 | -0.246967 | -0.280014 | -0.317423 | -0.357981 | -0.398777 | -0.435821 | -0.465152 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 15.0 | 1073.0 |
281 | -0.254506 | -0.270490 | -0.259972 | -0.226300 | -0.179451 | -0.127260 | -0.070444 | -0.004125 | 0.077391 | 0.177068 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 365.0 |
282 | -0.133279 | -0.144468 | -0.161016 | -0.181991 | -0.205754 | -0.230874 | -0.256796 | -0.284144 | -0.314486 | -0.349780 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 506.0 |
283 | -0.095838 | -0.094679 | -0.095861 | -0.098774 | -0.105749 | -0.121057 | -0.147979 | -0.187354 | -0.238501 | -0.301091 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.0 | 1034.0 |
284 | -0.154512 | -0.181448 | -0.217386 | -0.259778 | -0.306061 | -0.354698 | -0.404998 | -0.456535 | -0.508682 | -0.560353 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11.0 | 864.0 |
5 rows × 548 columns
%%time
valid_gdf['case_id'].nunique().compute(), valid_gdf['class_vals'].nunique().compute()
CPU times: user 50.1 ms, sys: 16.5 ms, total: 66.6 ms Wall time: 144 ms
(285, 20)
%%time
import dask_cudf
test_gdf = dask_cudf.read_parquet(output_test_dir)
test_gdf.head()
CPU times: user 317 ms, sys: 13.5 ms, total: 330 ms Wall time: 490 ms
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_2_174 | dim_2_175 | dim_2_176 | dim_2_177 | dim_2_178 | dim_2_179 | dim_2_180 | dim_2_181 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.011902 | -0.019550 | -0.062037 | -0.133200 | -0.221028 | -0.317045 | -0.419692 | -0.530973 | -0.651434 | -0.777891 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 444.0 |
1 | -0.121770 | -0.119831 | -0.116177 | -0.111483 | -0.108602 | -0.111719 | -0.123581 | -0.143425 | -0.167864 | -0.193229 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16.0 | 1199.0 |
2 | -0.122890 | -0.123181 | -0.124221 | -0.127167 | -0.133875 | -0.146406 | -0.166203 | -0.193641 | -0.228231 | -0.269213 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.0 | 1021.0 |
3 | -0.238751 | -0.286259 | -0.330241 | -0.366631 | -0.395599 | -0.419454 | -0.439938 | -0.457389 | -0.471751 | -0.483653 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 124.0 |
4 | -0.121683 | -0.119392 | -0.114387 | -0.105583 | -0.092747 | -0.076566 | -0.058176 | -0.038983 | -0.020820 | -0.005637 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16.0 | 1140.0 |
5 rows × 548 columns
test_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_2_174 | dim_2_175 | dim_2_176 | dim_2_177 | dim_2_178 | dim_2_179 | dim_2_180 | dim_2_181 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1431 | -0.229912 | -0.261109 | -0.277499 | -0.273361 | -0.248630 | -0.207103 | -0.152437 | -0.084686 | -0.000471 | 0.103468 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 380.0 |
1432 | -0.080856 | -0.130531 | -0.227994 | -0.364505 | -0.520037 | -0.674525 | -0.817338 | -0.948717 | -1.073356 | -1.192411 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 17.0 | 1266.0 |
1433 | -0.150089 | -0.170816 | -0.193861 | -0.213184 | -0.224774 | -0.229679 | -0.233106 | -0.240481 | -0.254193 | -0.273944 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.0 | 1033.0 |
1434 | -0.187826 | -0.219704 | -0.255131 | -0.291466 | -0.327528 | -0.363795 | -0.401146 | -0.439741 | -0.479128 | -0.519153 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
1435 | -0.122832 | -0.122929 | -0.123384 | -0.125062 | -0.129972 | -0.141473 | -0.163379 | -0.198216 | -0.246522 | -0.308311 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.0 | 1048.0 |
5 rows × 548 columns
%%time
test_gdf['case_id'].nunique().compute(), test_gdf['class_vals'].nunique().compute()
CPU times: user 60.5 ms, sys: 4.62 ms, total: 65.1 ms Wall time: 145 ms
(1436, 20)
We reset the kernel!!!
%%time
client.shutdown()
client.close()
Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1211, in _reconnect await self._ensure_connected(timeout=timeout) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1241, in _ensure_connected comm = await connect( File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect await asyncio.sleep(backoff) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/asyncio/tasks.py", line 659, in sleep return await future asyncio.exceptions.CancelledError Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1400, in _handle_report await self._reconnect() File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1211, in _reconnect await self._ensure_connected(timeout=timeout) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1241, in _ensure_connected comm = await connect( File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect await asyncio.sleep(backoff) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/asyncio/tasks.py", line 659, in sleep return await future asyncio.exceptions.CancelledError
CPU times: user 38.7 ms, sys: 7.77 ms, total: 46.5 ms Wall time: 617 ms
from nbdev import nbdev_export
nbdev_export()