#| default_exp feature_preprocessing.face_detection.tabular_to_timeseries
%load_ext autoreload
%autoreload 2
# declare a list tasks whose products you want to use as inputs
upstream = ['feature_preprocessing_face_detection']
# Parameters
upstream = {"feature_preprocessing_face_detection": {"nb": "/home/ubuntu/vitmtsc_nbdev/output/201_feature_preprocessing.face_detection.target_encoding.html", "FaceDetection_TRAIN_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/train", "FaceDetection_VALID_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/valid", "FaceDetection_TEST_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/test", "FaceDetection_workflow_dir": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/nvtabular_workflow"}}
product = {"nb": "/home/ubuntu/vitmtsc_nbdev/output/301_feature_preprocessing.face_detection.tabular_to_timeseries.html", "FaceDetection_TRAIN_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/train", "FaceDetection_VALID_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/valid", "FaceDetection_TEST_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/test"}
#| hide
from nbdev.showdoc import *
#| export
from vitmtsc import *
from vitmtsc.core import *
from vitmtsc.data.face_detection import *
from vitmtsc.feature_preprocessing.face_detection.target_encoding import *
import os
import glob
#| export
upstream = {
"feature_preprocessing_face_detection": {
"nb": "/home/ubuntu/vitmtsc_nbdev/output/201_feature_preprocessing.face_detection.target_encoding.html",
"FaceDetection_TRAIN_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/train",
"FaceDetection_VALID_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/valid",
"FaceDetection_TEST_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/test",
"FaceDetection_workflow_dir": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/nvtabular_workflow",
}
}
product = {
"nb": "/home/ubuntu/vitmtsc_nbdev/output/301_feature_preprocessing.face_detection.tabular_to_timeseries.html",
"FaceDetection_TRAIN_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/train",
"FaceDetection_VALID_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/valid",
"FaceDetection_TEST_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/test",
}
Convert Category Encoding data from tabular to time-series format
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
cluster = LocalCUDACluster(memory_limit='auto', device_memory_limit=0.5, rmm_pool_size='20GB', rmm_managed_memory=True)
client = Client(cluster)
client
2022-09-23 18:56:39,926 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 18:56:39,926 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 18:56:39,926 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 18:56:39,926 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 18:56:39,926 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 18:56:39,926 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 18:56:39,947 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 18:56:39,947 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
Client-757926df-3b71-11ed-812f-0a01290f6f4b
Connection method: Cluster object | Cluster type: dask_cuda.LocalCUDACluster |
Dashboard: http://127.0.0.1:8787/status |
ad3b8e5f
Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
Total threads: 4 | Total memory: 150.00 GiB |
Status: running | Using processes: True |
Scheduler-2cdea13b-ae78-48b9-8900-faf9610ec3b4
Comm: tcp://127.0.0.1:44691 | Workers: 4 |
Dashboard: http://127.0.0.1:8787/status | Total threads: 4 |
Started: Just now | Total memory: 150.00 GiB |
Comm: tcp://127.0.0.1:42929 | Total threads: 1 |
Dashboard: http://127.0.0.1:38577/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:46671 | |
Local directory: /tmp/dask-worker-space/worker-om6la5d8 | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:38175 | Total threads: 1 |
Dashboard: http://127.0.0.1:36947/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:41129 | |
Local directory: /tmp/dask-worker-space/worker-y93igzpe | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:43017 | Total threads: 1 |
Dashboard: http://127.0.0.1:38615/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:38925 | |
Local directory: /tmp/dask-worker-space/worker-ho0h83c4 | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:45957 | Total threads: 1 |
Dashboard: http://127.0.0.1:35493/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:34417 | |
Local directory: /tmp/dask-worker-space/worker-qdu8xfmu | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
#| export
DATASET_NAME = 'FaceDetection'
SEQUENCE_LENGTH = 62
NUMBER_OF_FEATURES = 144
NUM_TARGET = 2
Convert from Tabular to Time-Series Format
#| export
MTSC_COLUMN_NAMES = [
'dim_0',
'dim_1',
'dim_2',
'dim_3',
'dim_4',
'dim_5',
'dim_6',
'dim_7',
'dim_8',
'dim_9',
'dim_10',
'dim_11',
'dim_12',
'dim_13',
'dim_14',
'dim_15',
'dim_16',
'dim_17',
'dim_18',
'dim_19',
'dim_20',
'dim_21',
'dim_22',
'dim_23',
'dim_24',
'dim_25',
'dim_26',
'dim_27',
'dim_28',
'dim_29',
'dim_30',
'dim_31',
'dim_32',
'dim_33',
'dim_34',
'dim_35',
'dim_36',
'dim_37',
'dim_38',
'dim_39',
'dim_40',
'dim_41',
'dim_42',
'dim_43',
'dim_44',
'dim_45',
'dim_46',
'dim_47',
'dim_48',
'dim_49',
'dim_50',
'dim_51',
'dim_52',
'dim_53',
'dim_54',
'dim_55',
'dim_56',
'dim_57',
'dim_58',
'dim_59',
'dim_60',
'dim_61',
'dim_62',
'dim_63',
'dim_64',
'dim_65',
'dim_66',
'dim_67',
'dim_68',
'dim_69',
'dim_70',
'dim_71',
'dim_72',
'dim_73',
'dim_74',
'dim_75',
'dim_76',
'dim_77',
'dim_78',
'dim_79',
'dim_80',
'dim_81',
'dim_82',
'dim_83',
'dim_84',
'dim_85',
'dim_86',
'dim_87',
'dim_88',
'dim_89',
'dim_90',
'dim_91',
'dim_92',
'dim_93',
'dim_94',
'dim_95',
'dim_96',
'dim_97',
'dim_98',
'dim_99',
'dim_100',
'dim_101',
'dim_102',
'dim_103',
'dim_104',
'dim_105',
'dim_106',
'dim_107',
'dim_108',
'dim_109',
'dim_110',
'dim_111',
'dim_112',
'dim_113',
'dim_114',
'dim_115',
'dim_116',
'dim_117',
'dim_118',
'dim_119',
'dim_120',
'dim_121',
'dim_122',
'dim_123',
'dim_124',
'dim_125',
'dim_126',
'dim_127',
'dim_128',
'dim_129',
'dim_130',
'dim_131',
'dim_132',
'dim_133',
'dim_134',
'dim_135',
'dim_136',
'dim_137',
'dim_138',
'dim_139',
'dim_140',
'dim_141',
'dim_142',
'dim_143']
#| export
ALL_COLUMNS = ['case_id', 'case_id_seq', 'reading_id'] + MTSC_COLUMN_NAMES + ['class_vals']
Input Data Location
target_encoded_train_dir = os.path.join("./", upstream['feature_preprocessing_face_detection']['FaceDetection_TRAIN_TE'])
target_encoded_valid_dir = os.path.join("./", upstream['feature_preprocessing_face_detection']['FaceDetection_VALID_TE'])
target_encoded_test_dir = os.path.join("./", upstream['feature_preprocessing_face_detection']['FaceDetection_TEST_TE'])
Output Data Location
output_train_dir = os.path.join("./", product['FaceDetection_TRAIN_MODEL_INPUT'])
output_valid_dir = os.path.join("./", product['FaceDetection_VALID_MODEL_INPUT'])
output_test_dir = os.path.join("./", product['FaceDetection_TEST_MODEL_INPUT'])
!mkdir -p $output_train_dir
!mkdir -p $output_valid_dir
!mkdir -p $output_test_dir
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_train_dir,
output_dir = output_train_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 0 case_id_seq_max: 5889 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 5889 Before CumCount Min: 0 CumCount Max: 61 After CumCount Min: 0 CumCount Max: 61 sorted flattened_gdf.shape: (4712, 8930) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/train/chunk_0_part_0.parquet with records from iloc: 0 to 4712 Finished processing chunk: 0 with case_id_seq from : 0 to 5889 CPU times: user 15.5 s, sys: 2.82 s, total: 18.3 s Wall time: 19.7 s
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_valid_dir,
output_dir = output_valid_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 8 case_id_seq_max: 5882 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 5882 Before CumCount Min: 0 CumCount Max: 61 After CumCount Min: 0 CumCount Max: 61 sorted flattened_gdf.shape: (1178, 8930) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/valid/chunk_0_part_0.parquet with records from iloc: 0 to 1178 Finished processing chunk: 0 with case_id_seq from : 0 to 5882 CPU times: user 10.8 s, sys: 770 ms, total: 11.6 s Wall time: 11.4 s
Tabular to Time-Series format conversion
%%time
convert_from_tabular_to_timeseries_format(input_dir = target_encoded_test_dir,
output_dir = output_test_dir,
all_columns = ALL_COLUMNS,
mtsc_column_names = MTSC_COLUMN_NAMES,
chunk_size_processing = 50000,
number_of_features = NUMBER_OF_FEATURES,
seq_len = SEQUENCE_LENGTH,
chunk_size_file = 10000)
case_id_seq_min: 0 case_id_seq_max: 3523 Total number of chunks to be processed: 1 Started processing chunk: 0 with case_id_seq from : 0 to 3523 Before CumCount Min: 0 CumCount Max: 61 After CumCount Min: 0 CumCount Max: 61 sorted flattened_gdf.shape: (3524, 8930) Total number of files to be created: 1 Writing to output file: /home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/test/chunk_0_part_0.parquet with records from iloc: 0 to 3524 Finished processing chunk: 0 with case_id_seq from : 0 to 3523 CPU times: user 12.7 s, sys: 1.6 s, total: 14.3 s Wall time: 14 s
%%time
import dask_cudf
train_gdf = dask_cudf.read_parquet(output_train_dir)
train_gdf.head()
CPU times: user 18 s, sys: 396 ms, total: 18.4 s Wall time: 30.9 s
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_143_54 | dim_143_55 | dim_143_56 | dim_143_57 | dim_143_58 | dim_143_59 | dim_143_60 | dim_143_61 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -3.611579 | -3.544451 | -3.310208 | -2.981573 | -2.707916 | -4.318740 | -2.681417 | -1.143323 | -0.371899 | -0.398802 | ... | 0.098988 | 0.111700 | 0.059702 | -0.353686 | -0.669344 | -0.701798 | -0.503293 | 0.011390 | 1.0 | 5703.0 |
1 | 1.504849 | 1.429360 | 1.372945 | 1.154656 | 0.879269 | 1.387374 | 2.662556 | 0.951379 | 1.861210 | 0.875889 | ... | 1.051566 | 1.135207 | 1.283814 | -0.551966 | -0.409467 | -2.146049 | -1.793830 | -0.727230 | 0.0 | 4245.0 |
2 | 0.597107 | 0.689412 | 0.784829 | 0.793919 | 0.767928 | -0.301940 | 1.133886 | 0.802337 | 2.655175 | 2.248975 | ... | 0.894139 | 0.352814 | -0.889328 | -0.906947 | -1.318142 | -0.732796 | -0.446137 | -0.211485 | 0.0 | 1386.0 |
3 | 0.131422 | 0.697414 | 0.952692 | 1.163141 | 1.346406 | -1.309643 | -1.616391 | -1.709870 | -1.079094 | -1.121214 | ... | -0.869001 | -0.232134 | -0.238141 | -0.025023 | -0.333906 | -0.146325 | 0.264351 | 0.253558 | 1.0 | 4911.0 |
4 | -0.680714 | -0.349697 | -0.305056 | -0.227676 | -0.145361 | -1.335729 | -1.568024 | -1.274372 | -0.817652 | 0.180512 | ... | 0.395892 | 0.026665 | 1.564071 | 1.091476 | 2.065342 | 1.780000 | 0.580152 | 0.762950 | 1.0 | 2948.0 |
5 rows × 8930 columns
train_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_143_54 | dim_143_55 | dim_143_56 | dim_143_57 | dim_143_58 | dim_143_59 | dim_143_60 | dim_143_61 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4707 | -1.330701 | -1.291110 | -1.334657 | -1.295054 | -1.169601 | -0.375712 | 0.176117 | -0.824844 | -0.215864 | -1.207686 | ... | 0.250639 | 0.414858 | 0.710168 | 1.343488 | 0.567356 | -1.193091 | -1.807238 | -1.130595 | 1.0 | 1988.0 |
4708 | 0.336049 | 0.747271 | 0.967525 | 1.086845 | 1.141900 | 0.723601 | 0.356979 | 1.530178 | 0.421692 | 0.884415 | ... | 0.441511 | -0.191611 | 0.073925 | -1.173424 | -0.542870 | 0.317379 | 0.692330 | 0.577826 | 0.0 | 4626.0 |
4709 | 1.063434 | 1.186315 | 1.225160 | 1.331084 | 1.327598 | 0.170167 | 0.987125 | 1.853559 | 0.186814 | 2.500128 | ... | -1.672389 | -1.376272 | -1.362003 | -1.295241 | -0.243934 | -0.899911 | -0.228563 | -0.566318 | 0.0 | 709.0 |
4710 | 0.530625 | 0.251153 | 0.030042 | -0.115527 | -0.133783 | 1.220124 | -0.449545 | 0.319548 | -0.101689 | 0.795397 | ... | 0.035429 | -0.201459 | -0.208152 | -0.226692 | 0.061203 | 0.429655 | -0.420488 | -0.550902 | 1.0 | 3741.0 |
4711 | -0.909796 | -1.171535 | -1.365407 | -1.534692 | -1.710724 | 1.384764 | -1.055296 | 1.287799 | 0.147919 | -0.362669 | ... | -0.290226 | 0.181056 | 0.565160 | 0.028504 | -0.661667 | -1.072682 | -0.651636 | -0.356642 | 0.0 | 670.0 |
5 rows × 8930 columns
%%time
train_gdf['case_id'].nunique().compute(), train_gdf['class_vals'].nunique().compute()
CPU times: user 888 ms, sys: 19.4 ms, total: 907 ms Wall time: 2.05 s
(4712, 2)
%%time
import dask_cudf
valid_gdf = dask_cudf.read_parquet(output_valid_dir)
valid_gdf.head()
CPU times: user 17.4 s, sys: 408 ms, total: 17.8 s Wall time: 30.1 s
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_143_54 | dim_143_55 | dim_143_56 | dim_143_57 | dim_143_58 | dim_143_59 | dim_143_60 | dim_143_61 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.929148 | -1.105110 | -0.941891 | -0.649398 | -0.245340 | -0.266881 | -1.239319 | -1.406907 | -0.599465 | 0.242002 | ... | -0.343859 | 0.338954 | 1.288810 | 1.278083 | 0.619133 | -0.555734 | 0.162212 | 0.371525 | 0.0 | 3784.0 |
1 | 0.381624 | 0.886542 | 0.973097 | 0.997448 | 1.014284 | 1.556347 | 1.160265 | 1.130376 | 1.257697 | 0.260258 | ... | -0.385063 | -1.367192 | 0.094255 | 1.124542 | 0.822106 | 0.694545 | 0.525326 | 0.327642 | 1.0 | 408.0 |
2 | 1.926687 | 1.510785 | 1.395330 | 1.163647 | 1.046519 | 1.039880 | 0.538547 | 1.490433 | 1.576309 | 2.003257 | ... | 0.695202 | 0.031622 | 0.095980 | -0.190589 | 0.258013 | 0.135613 | -0.412100 | -1.018441 | 0.0 | 5582.0 |
3 | 0.367114 | 0.651509 | 0.432148 | 0.233512 | -0.000091 | -0.081989 | -0.074007 | -1.006657 | -0.841989 | -1.933067 | ... | 0.020846 | 1.013557 | 0.071996 | 0.453203 | 1.324450 | 0.724536 | -0.134212 | 0.593665 | 0.0 | 465.0 |
4 | -1.538407 | -1.339643 | -1.162910 | -0.972035 | -0.799651 | -1.043930 | -0.969960 | 0.559300 | 0.073164 | -0.181563 | ... | 0.073362 | -0.110095 | -0.213497 | 0.238001 | -0.612707 | 0.354244 | 0.139645 | 0.560081 | 0.0 | 4169.0 |
5 rows × 8930 columns
valid_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_143_54 | dim_143_55 | dim_143_56 | dim_143_57 | dim_143_58 | dim_143_59 | dim_143_60 | dim_143_61 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1173 | 0.071042 | -0.024387 | 0.191721 | 0.128114 | -0.059758 | 0.268023 | 1.476222 | 0.959334 | -1.141774 | -0.423020 | ... | -0.365797 | 0.908630 | 1.685662 | 0.574225 | 0.349738 | -0.549095 | -0.288867 | 0.711257 | 1.0 | 1561.0 |
1174 | 0.252586 | 0.439646 | 0.425299 | 0.419486 | 0.267317 | 0.321941 | -0.448104 | 1.129621 | -1.374113 | -0.475453 | ... | -0.415012 | -1.096173 | -1.425451 | -1.266408 | 0.624598 | 0.273851 | -1.239975 | -0.912561 | 0.0 | 624.0 |
1175 | -0.581236 | -0.831223 | -0.900761 | -1.045076 | -1.251502 | 0.073992 | 1.158753 | 1.319034 | 1.447029 | 0.329836 | ... | -1.761261 | -2.998023 | -3.599784 | -2.669970 | -1.778380 | -0.402851 | 0.004090 | 0.624326 | 1.0 | 486.0 |
1176 | 0.561493 | 0.582066 | 0.659096 | 0.682265 | 0.696228 | 1.799372 | 1.108655 | 0.715644 | -0.280438 | 0.629797 | ... | -0.952442 | -0.806641 | -1.839325 | -1.666076 | -0.240264 | -0.200910 | -0.022648 | -1.224240 | 0.0 | 3880.0 |
1177 | -0.242705 | -0.013153 | 0.180737 | 0.394410 | 0.638280 | 1.529123 | 1.484270 | 0.606758 | 1.107706 | 0.441574 | ... | 0.104662 | -1.271352 | -0.533765 | -2.233684 | -1.331086 | 0.250216 | 1.427544 | 2.279614 | 1.0 | 3185.0 |
5 rows × 8930 columns
%%time
valid_gdf['case_id'].nunique().compute(), valid_gdf['class_vals'].nunique().compute()
CPU times: user 726 ms, sys: 28.9 ms, total: 754 ms Wall time: 1.89 s
(1178, 2)
%%time
import dask_cudf
test_gdf = dask_cudf.read_parquet(output_test_dir)
test_gdf.head()
CPU times: user 17.4 s, sys: 371 ms, total: 17.8 s Wall time: 30.1 s
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_143_54 | dim_143_55 | dim_143_56 | dim_143_57 | dim_143_58 | dim_143_59 | dim_143_60 | dim_143_61 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.358228 | 0.074760 | 0.015374 | -0.193739 | -0.368179 | -0.071744 | 0.554529 | -0.196741 | -0.677700 | -1.545661 | ... | -0.339607 | 0.555091 | 1.180807 | 1.288541 | 0.139732 | 0.738019 | -0.025770 | 1.075838 | 1.0 | 2793.0 |
1 | -1.387512 | -1.752615 | -1.849856 | -1.727745 | -1.521146 | -0.242973 | -1.734590 | 0.044742 | 0.785843 | 0.681541 | ... | 1.111590 | 2.149374 | 1.126483 | -0.585017 | -1.489166 | -1.763359 | 0.069810 | -0.063899 | 0.0 | 2157.0 |
2 | 1.273599 | 0.823874 | 0.738863 | 0.651619 | 0.682204 | -0.476438 | -0.353018 | -0.815121 | -0.947192 | -0.574885 | ... | 1.704488 | 0.166966 | 0.080120 | -0.266172 | 0.150620 | 0.891987 | 0.869114 | 1.031004 | 1.0 | 965.0 |
3 | -0.269798 | -0.370553 | -0.266308 | -0.204556 | -0.154651 | -0.892622 | -0.494802 | -1.803072 | -0.659148 | -0.408984 | ... | 0.626399 | 0.286209 | -0.111414 | 0.044262 | 0.644858 | 1.219852 | -0.716698 | -0.796233 | 1.0 | 1632.0 |
4 | 1.010976 | 1.183456 | 1.082183 | 0.931184 | 0.827308 | -0.039847 | -0.226516 | 0.412190 | 0.006674 | 0.045594 | ... | -0.919426 | -0.862778 | -1.319405 | -0.519904 | 0.079618 | 0.406579 | 0.364146 | -0.687530 | 0.0 | 540.0 |
5 rows × 8930 columns
test_gdf.tail()
dim_0_0 | dim_0_1 | dim_0_2 | dim_0_3 | dim_0_4 | dim_0_5 | dim_0_6 | dim_0_7 | dim_0_8 | dim_0_9 | ... | dim_143_54 | dim_143_55 | dim_143_56 | dim_143_57 | dim_143_58 | dim_143_59 | dim_143_60 | dim_143_61 | class_vals | case_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3519 | 0.879537 | 0.816978 | 0.660403 | 0.435763 | 0.061187 | -0.143533 | -0.494938 | -0.653089 | -0.428259 | -1.971869 | ... | 0.215659 | 0.369807 | 0.352191 | 0.522829 | 1.076980 | 0.811910 | 0.887828 | -0.632244 | 0.0 | 1926.0 |
3520 | -1.016368 | -0.598267 | -0.361403 | -0.220654 | -0.074354 | 0.493791 | 0.625948 | 0.201896 | -0.443691 | -0.448058 | ... | -1.010442 | -0.686450 | -0.054556 | 1.387704 | 0.192816 | -1.033519 | -0.917726 | -1.895749 | 0.0 | 1862.0 |
3521 | 0.123616 | -0.188700 | -0.440103 | -0.636104 | -0.794950 | 1.500530 | 1.338156 | 0.543613 | 1.295190 | -0.085536 | ... | 0.862444 | 1.219391 | 1.160300 | 0.478101 | 0.961551 | 0.176353 | 1.199245 | 0.511172 | 1.0 | 2977.0 |
3522 | 0.386057 | 0.349258 | 0.125756 | -0.070403 | -0.297548 | 0.588159 | -0.016820 | -0.194569 | 0.454931 | -0.762533 | ... | -0.526063 | 0.062500 | -0.139767 | 0.778894 | 1.089592 | -0.019971 | 0.732896 | 0.461898 | 0.0 | 1747.0 |
3523 | -0.388013 | -0.748928 | -1.110766 | -1.399547 | -1.547126 | -0.374099 | 0.621944 | -0.450650 | 0.271023 | -0.436169 | ... | 1.113185 | 1.324051 | 0.893113 | 0.358821 | 0.468017 | 0.255657 | 0.184054 | 0.001536 | 1.0 | 3039.0 |
5 rows × 8930 columns
%%time
test_gdf['case_id'].nunique().compute(), test_gdf['class_vals'].nunique().compute()
CPU times: user 777 ms, sys: 37.1 ms, total: 814 ms Wall time: 1.97 s
(3524, 2)
We reset the kernel!!!
%%time
client.shutdown()
client.close()
Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1211, in _reconnect await self._ensure_connected(timeout=timeout) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1241, in _ensure_connected comm = await connect( File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect await asyncio.sleep(backoff) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/asyncio/tasks.py", line 659, in sleep return await future asyncio.exceptions.CancelledError Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1400, in _handle_report await self._reconnect() File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1211, in _reconnect await self._ensure_connected(timeout=timeout) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1241, in _ensure_connected comm = await connect( File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect await asyncio.sleep(backoff) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/asyncio/tasks.py", line 659, in sleep return await future asyncio.exceptions.CancelledError
CPU times: user 38.5 ms, sys: 5.57 ms, total: 44 ms Wall time: 668 ms
from nbdev import nbdev_export
nbdev_export()