#| default_exp feature_preprocessing.spoken_arabic_digits.tabular_to_timeseries
%load_ext autoreload
%autoreload 2
# declare a list tasks whose products you want to use as inputs
upstream = ['feature_preprocessing_spoken_arabic_digits']
# Parameters
upstream = {"feature_preprocessing_spoken_arabic_digits": {"nb": "/home/ubuntu/vitmtsc_nbdev/output/204_feature_preprocessing.spoken_arabic_digits.target_encoding.html", "SpokenArabicDigits_TRAIN_TE": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding/train", "SpokenArabicDigits_VALID_TE": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding/valid", "SpokenArabicDigits_TEST_TE": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding/test", "SpokenArabicDigits_workflow_dir": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding/nvtabular_workflow"}}
product = {"nb": "/home/ubuntu/vitmtsc_nbdev/output/304_feature_preprocessing.spoken_arabic_digits.tabular_to_timeseries.html", "SpokenArabicDigits_TRAIN_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/train", "SpokenArabicDigits_VALID_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/valid", "SpokenArabicDigits_TEST_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/test"}
#| hide
from nbdev.showdoc import *
#| export
from vitmtsc import *
from vitmtsc.core import *
from vitmtsc.data.spoken_arabic_digits import *
from vitmtsc.feature_preprocessing.spoken_arabic_digits.target_encoding import *
import os
import glob
#| export
upstream = {
"feature_preprocessing_spoken_arabic_digits": {
"nb": "/home/ubuntu/vitmtsc_nbdev/output/204_feature_preprocessing.spoken_arabic_digits.target_encoding.html",
"SpokenArabicDigits_TRAIN_TE": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding/train",
"SpokenArabicDigits_VALID_TE": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding/valid",
"SpokenArabicDigits_TEST_TE": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding/test",
"SpokenArabicDigits_workflow_dir": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding/nvtabular_workflow",
}
}
product = {
"nb": "/home/ubuntu/vitmtsc_nbdev/output/304_feature_preprocessing.spoken_arabic_digits.tabular_to_timeseries.html",
"SpokenArabicDigits_TRAIN_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/train",
"SpokenArabicDigits_VALID_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/valid",
"SpokenArabicDigits_TEST_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/test",
}
Convert Category Encoding data from tabular to time-series format
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
cluster = LocalCUDACluster(memory_limit='auto', device_memory_limit=0.5, rmm_pool_size='20GB', rmm_managed_memory=True)
client = Client(cluster)
client
2022-09-23 19:02:38,430 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:02:38,430 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:02:38,432 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:02:38,432 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:02:38,451 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:02:38,452 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:02:38,452 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:02:38,452 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
Client-4b25feba-3b72-11ed-810f-061eae6df733
Connection method: Cluster object | Cluster type: dask_cuda.LocalCUDACluster |
Dashboard: http://127.0.0.1:8787/status |
35b4d850
Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
Total threads: 4 | Total memory: 150.00 GiB |
Status: running | Using processes: True |
Scheduler-dc2817a0-292b-407c-9b0c-c02159126607
Comm: tcp://127.0.0.1:41897 | Workers: 4 |
Dashboard: http://127.0.0.1:8787/status | Total threads: 4 |
Started: Just now | Total memory: 150.00 GiB |
Comm: tcp://127.0.0.1:42887 | Total threads: 1 |
Dashboard: http://127.0.0.1:33551/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:37017 | |
Local directory: /tmp/dask-worker-space/worker-94g9qh2n | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:41907 | Total threads: 1 |
Dashboard: http://127.0.0.1:40857/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:33807 | |
Local directory: /tmp/dask-worker-space/worker-50rz32l0 | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:42281 | Total threads: 1 |
Dashboard: http://127.0.0.1:40467/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:37151 | |
Local directory: /tmp/dask-worker-space/worker-t7bn6z0x | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:34383 | Total threads: 1 |
Dashboard: http://127.0.0.1:40589/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:41889 | |
Local directory: /tmp/dask-worker-space/worker-kdcmpxq_ | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
#| export
DATASET_NAME = 'SpokenArabicDigits'
SEQUENCE_LENGTH = 93
NUMBER_OF_FEATURES = 13
NUM_TARGET = 10
Convert from Tabular to Time-Series Format
#| export
MTSC_COLUMN_NAMES = [
'dim_0',
'dim_1',
'dim_2',
'dim_3',
'dim_4',
'dim_5',
'dim_6',
'dim_7',
'dim_8',
'dim_9',
'dim_10',
'dim_11',
'dim_12']
#| export
ALL_COLUMNS = ['case_id', 'case_id_seq', 'reading_id'] + MTSC_COLUMN_NAMES + ['class_vals']
Input Data Location
target_encoded_train_dir = os.path.join("./", upstream['feature_preprocessing_spoken_arabic_digits']['SpokenArabicDigits_TRAIN_TE'])
target_encoded_valid_dir = os.path.join("./", upstream['feature_preprocessing_spoken_arabic_digits']['SpokenArabicDigits_VALID_TE'])
target_encoded_test_dir = os.path.join("./", upstream['feature_preprocessing_spoken_arabic_digits']['SpokenArabicDigits_TEST_TE'])
Output Data Location
output_train_dir = os.path.join("./", product['SpokenArabicDigits_TRAIN_MODEL_INPUT'])
output_valid_dir = os.path.join("./", product['SpokenArabicDigits_VALID_MODEL_INPUT'])
output_test_dir = os.path.join("./", product['SpokenArabicDigits_TEST_MODEL_INPUT'])
!mkdir -p $output_train_dir
!mkdir -p $output_valid_dir
!mkdir -p $output_test_dir
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_train_dir,
output_dir = output_train_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 0 case_id_seq_max: 6598 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 6598 Before CumCount Min: 0 CumCount Max: 92 After CumCount Min: 0 CumCount Max: 92 sorted flattened_gdf.shape: (5279, 1211) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/train/chunk_0_part_0.parquet with records from iloc: 0 to 5279 Finished processing chunk: 0 with case_id_seq from : 0 to 6598 CPU times: user 4.51 s, sys: 892 ms, total: 5.4 s Wall time: 7.06 s
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_valid_dir,
output_dir = output_valid_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 8 case_id_seq_max: 6595 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 6595 Before CumCount Min: 0 CumCount Max: 76 After CumCount Min: 0 CumCount Max: 76 sorted flattened_gdf.shape: (1320, 1211) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/valid/chunk_0_part_0.parquet with records from iloc: 0 to 1320 Finished processing chunk: 0 with case_id_seq from : 0 to 6595 CPU times: user 1.78 s, sys: 125 ms, total: 1.9 s Wall time: 1.89 s
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_test_dir,
output_dir = output_test_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 0 case_id_seq_max: 2198 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 2198 Before CumCount Min: 0 CumCount Max: 82 After CumCount Min: 0 CumCount Max: 82 sorted flattened_gdf.shape: (2199, 1211) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/target_encoding-nn/test/chunk_0_part_0.parquet with records from iloc: 0 to 2199 Finished processing chunk: 0 with case_id_seq from : 0 to 2198 CPU times: user 1.93 s, sys: 97.8 ms, total: 2.03 s Wall time: 2.02 s
%%time
import dask_cudf
train_gdf = dask_cudf.read_parquet(output_train_dir)
train_gdf.head()
CPU times: user 809 ms, sys: 24.1 ms, total: 833 ms Wall time: 1.24 s
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_12_85 | dim_12_86 | dim_12_87 | dim_12_88 | dim_12_89 | dim_12_90 | dim_12_91 | dim_12_92 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.705349 | -0.676248 | -0.481761 | 0.351920 | 0.710952 | 0.828561 | 0.857438 | 0.752634 | 0.668000 | 0.454983 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 3637.0 |
1 | -0.821576 | -0.363190 | 0.656355 | 0.902892 | 0.831862 | 0.248096 | -0.345719 | -0.368327 | -0.648289 | -1.208962 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 9.0 | 6493.0 |
2 | -1.843552 | -0.735614 | -0.399746 | -0.307773 | -0.494479 | -0.854084 | -1.578477 | -1.882803 | -1.911788 | -2.076630 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 9.0 | 6307.0 |
3 | -1.037652 | -1.579965 | -1.640039 | -1.515646 | -0.288510 | -0.498640 | 0.836760 | 1.456875 | 1.705517 | 1.783802 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 7.0 | 5177.0 |
4 | -1.733125 | -1.259967 | -1.498995 | -1.329074 | -1.471497 | 0.461549 | 0.796674 | 0.746721 | 0.565119 | -0.050897 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 461.0 |
5 rows × 1211 columns
train_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_12_85 | dim_12_86 | dim_12_87 | dim_12_88 | dim_12_89 | dim_12_90 | dim_12_91 | dim_12_92 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5274 | 0.087934 | -0.035516 | -0.382685 | -1.170508 | -1.799076 | -2.442917 | -2.497768 | -2.384257 | -2.690325 | -2.847802 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1751.0 |
5275 | 0.824571 | 1.260763 | 1.494095 | 1.537519 | 1.528268 | 1.452377 | 1.388385 | 0.903110 | -0.408188 | -0.677572 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 7.0 | 5218.0 |
5276 | -0.863636 | -0.497661 | 0.107814 | 0.167308 | 0.287456 | 0.088152 | 0.010447 | -0.335924 | -1.055275 | -1.233158 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1947.0 |
5277 | 0.059275 | 0.324459 | 0.554309 | 0.547089 | 0.484911 | -0.135749 | -1.022089 | -1.019996 | -0.593947 | -0.337811 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 2597.0 |
5278 | -0.888249 | -0.722669 | -0.060329 | -0.327145 | -0.062506 | 0.183451 | 0.254118 | 0.646488 | 0.907862 | 0.123268 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 32.0 |
5 rows × 1211 columns
%%time
train_gdf['case_id'].nunique().compute(), train_gdf['class_vals'].nunique().compute()
CPU times: user 292 ms, sys: 17.8 ms, total: 310 ms Wall time: 494 ms
(5279, 10)
%%time
import dask_cudf
valid_gdf = dask_cudf.read_parquet(output_valid_dir)
valid_gdf.head()
CPU times: user 797 ms, sys: 13.3 ms, total: 810 ms Wall time: 1.15 s
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_12_85 | dim_12_86 | dim_12_87 | dim_12_88 | dim_12_89 | dim_12_90 | dim_12_91 | dim_12_92 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.200573 | 0.283212 | 0.297541 | 0.265944 | 0.347821 | 0.192302 | -0.301897 | -0.550777 | -1.370430 | -2.280433 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 9.0 | 6086.0 |
1 | -0.865841 | -0.665789 | -0.638426 | -0.576530 | -0.572445 | -0.161578 | 0.138903 | 0.017158 | -0.060619 | -0.262790 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 3789.0 |
2 | -0.229815 | -0.260214 | -0.232209 | 0.075963 | -0.297543 | -1.327841 | -2.046811 | -2.403956 | -2.293674 | -2.761609 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1611.0 |
3 | -0.352611 | -0.751726 | -0.683649 | -0.543681 | -0.659662 | -0.777170 | -0.580970 | -1.163942 | -0.308862 | 0.422950 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 3820.0 |
4 | -1.541185 | -0.139957 | 0.700359 | 0.663683 | 0.674784 | 0.528661 | 0.051113 | -0.363676 | -0.335416 | -0.362994 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 7.0 | 5223.0 |
5 rows × 1211 columns
valid_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_12_85 | dim_12_86 | dim_12_87 | dim_12_88 | dim_12_89 | dim_12_90 | dim_12_91 | dim_12_92 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1315 | -0.666631 | -1.335967 | -1.152261 | -0.650702 | -0.440474 | -0.458442 | -0.563372 | -0.595688 | -1.090228 | -2.103330 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 3887.0 |
1316 | -1.366439 | -0.477531 | -0.284955 | -0.188785 | -0.271061 | -0.854062 | -1.657488 | -1.495548 | -1.724854 | -2.230226 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 9.0 | 6453.0 |
1317 | -0.471226 | -0.009288 | 0.455853 | 0.247987 | -0.485475 | -1.721371 | -2.095603 | -2.535024 | -2.326577 | -2.510464 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1446.0 |
1318 | -1.332629 | -0.935489 | -1.161548 | -0.495270 | -0.044295 | -0.235655 | -0.231483 | -0.328923 | 0.613694 | 1.201051 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 414.0 |
1319 | 0.576909 | 0.862553 | 0.695171 | 0.587466 | 0.798742 | 1.016656 | 0.817352 | 0.083581 | 0.388705 | 0.676090 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 3245.0 |
5 rows × 1211 columns
%%time
valid_gdf['case_id'].nunique().compute(), valid_gdf['class_vals'].nunique().compute()
CPU times: user 96.3 ms, sys: 9.68 ms, total: 106 ms Wall time: 235 ms
(1320, 10)
%%time
import dask_cudf
test_gdf = dask_cudf.read_parquet(output_test_dir)
test_gdf.head()
CPU times: user 938 ms, sys: 22.2 ms, total: 960 ms Wall time: 1.36 s
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_12_85 | dim_12_86 | dim_12_87 | dim_12_88 | dim_12_89 | dim_12_90 | dim_12_91 | dim_12_92 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.205761 | 0.459590 | 0.492747 | 0.413518 | 0.268592 | -0.043787 | -0.522721 | -0.375763 | -0.397856 | -0.381107 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 1872.0 |
1 | -1.111704 | -0.809006 | -0.181929 | -0.087501 | 0.044547 | 0.059420 | -0.277047 | -0.644651 | -0.587928 | -0.650847 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 9.0 | 2009.0 |
2 | -1.270777 | -0.803706 | -0.167128 | -0.098275 | 0.057824 | 0.117318 | 0.034426 | -0.050897 | -0.254047 | -0.917401 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 1789.0 |
3 | -1.295591 | -1.663873 | -1.462754 | -1.160460 | -0.947468 | -0.655846 | 0.572447 | 0.842491 | 0.868683 | 0.710879 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 1361.0 |
4 | 1.732071 | 1.950349 | 1.898183 | 1.832558 | 1.805096 | 1.797805 | 1.675951 | 0.738885 | 1.313618 | 1.533383 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 1008.0 |
5 rows × 1211 columns
test_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_12_85 | dim_12_86 | dim_12_87 | dim_12_88 | dim_12_89 | dim_12_90 | dim_12_91 | dim_12_92 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2194 | 0.358087 | 0.792466 | 0.748752 | 0.792284 | 0.786262 | 0.693865 | 0.583983 | 0.375536 | 0.384533 | 0.744943 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 1864.0 |
2195 | -1.224343 | -1.422850 | -0.844097 | -1.372534 | -1.045012 | -1.210667 | -1.260874 | -1.396041 | -0.050317 | 0.254698 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 937.0 |
2196 | -1.703741 | -0.348730 | 0.959520 | 1.348952 | 1.589721 | 1.679796 | 1.569696 | 1.136225 | 0.413083 | 0.518358 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 1763.0 |
2197 | -0.291957 | 0.608651 | 1.291344 | 1.694851 | 1.810465 | 1.788373 | 1.760549 | 1.797224 | 1.713135 | 1.579745 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 886.0 |
2198 | -0.692561 | -1.180811 | -0.899234 | -0.496757 | -0.770147 | -1.035283 | -1.505960 | -0.502308 | 0.451319 | 0.360627 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 1152.0 |
5 rows × 1211 columns
%%time
test_gdf['case_id'].nunique().compute(), test_gdf['class_vals'].nunique().compute()
CPU times: user 94.7 ms, sys: 6.77 ms, total: 101 ms Wall time: 234 ms
(2199, 10)
We reset the kernel!!!
%%time
client.shutdown()
client.close()
Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1211, in _reconnect await self._ensure_connected(timeout=timeout) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1241, in _ensure_connected comm = await connect( File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect await asyncio.sleep(backoff) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/asyncio/tasks.py", line 659, in sleep return await future asyncio.exceptions.CancelledError Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1400, in _handle_report await self._reconnect() File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1211, in _reconnect await self._ensure_connected(timeout=timeout) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1241, in _ensure_connected comm = await connect( File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect await asyncio.sleep(backoff) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/asyncio/tasks.py", line 659, in sleep return await future asyncio.exceptions.CancelledError
CPU times: user 25.7 ms, sys: 13.1 ms, total: 38.8 ms Wall time: 618 ms
from nbdev import nbdev_export
nbdev_export()