#| default_exp data.spoken_arabic_digits
%load_ext autoreload
%autoreload 2
# declare a list tasks whose products you want to use as inputs
upstream = ['core']
# Parameters
upstream = {"core": {"nb": "/home/ubuntu/vitmtsc_nbdev/output/00_core.html", "FaceDetection_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/ts/train/FaceDetection_TRAIN.ts", "FaceDetection_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/ts/test/FaceDetection_TEST.ts", "InsectWingbeat_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/InsectWingbeat/ts/train/InsectWingbeat_TRAIN.ts", "InsectWingbeat_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/InsectWingbeat/ts/test/InsectWingbeat_TEST.ts", "PenDigits_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/PenDigits/ts/train/PenDigits_TRAIN.ts", "PenDigits_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/PenDigits/ts/test/PenDigits_TEST.ts", "SpokenArabicDigits_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/ts/train/SpokenArabicDigits_TRAIN.ts", "SpokenArabicDigits_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/ts/test/SpokenArabicDigits_TEST.ts", "CharacterTrajectories_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/ts/train/CharacterTrajectories_TRAIN.ts", "CharacterTrajectories_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/ts/test/CharacterTrajectories_TEST.ts"}}
product = {"nb": "/home/ubuntu/vitmtsc_nbdev/output/104_data.spoken_arabic_digits.html", "SpokenArabicDigits_TRAIN_RAW": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/raw/train", "SpokenArabicDigits_VALID_RAW": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/raw/valid", "SpokenArabicDigits_TEST_RAW": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/raw/test"}
#| hide
from nbdev.showdoc import *
#| export
from vitmtsc import *
from vitmtsc.core import *
import dask_cudf
import gc #garbage collector interface
#| export
upstream = {
"core": {
"nb": "/home/ubuntu/vitmtsc_nbdev/output/00_core.html",
"FaceDetection_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/ts/train/FaceDetection_TRAIN.ts",
"FaceDetection_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/ts/test/FaceDetection_TEST.ts",
"InsectWingbeat_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/InsectWingbeat/ts/train/InsectWingbeat_TRAIN.ts",
"InsectWingbeat_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/InsectWingbeat/ts/test/InsectWingbeat_TEST.ts",
"PenDigits_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/PenDigits/ts/train/PenDigits_TRAIN.ts",
"PenDigits_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/PenDigits/ts/test/PenDigits_TEST.ts",
"SpokenArabicDigits_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/ts/train/SpokenArabicDigits_TRAIN.ts",
"SpokenArabicDigits_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/ts/test/SpokenArabicDigits_TEST.ts",
"CharacterTrajectories_TRAIN_TS": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/ts/train/CharacterTrajectories_TRAIN.ts",
"CharacterTrajectories_TEST_TS": "/home/ubuntu/vitmtsc_nbdev/output/CharacterTrajectories/ts/test/CharacterTrajectories_TEST.ts",
}
}
product = {
"nb": "/home/ubuntu/vitmtsc_nbdev/output/104_data.spoken_arabic_digits.html",
"SpokenArabicDigits_TRAIN_RAW": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/raw/train",
"SpokenArabicDigits_VALID_RAW": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/raw/valid",
"SpokenArabicDigits_TEST_RAW": "/home/ubuntu/vitmtsc_nbdev/output/SpokenArabicDigits/raw/test",
}
Convert dataset to parquet format to run target encoding
#| export
DATASET_NAME = 'SpokenArabicDigits'
%%time
train = get_mtsc_data_tabular_from_ts(upstream['core']['SpokenArabicDigits_TRAIN_TS'])
train.shape
Reading dataset TS file... Converting _x to tabular format... Converting _y to tabular format... Merging _x and _y... CPU times: user 23.9 s, sys: 251 ms, total: 24.1 s Wall time: 24.1 s
(263224, 16)
train.head()
case_id | reading_id | dim_0 | dim_1 | dim_2 | dim_3 | dim_4 | dim_5 | dim_6 | dim_7 | dim_8 | dim_9 | dim_10 | dim_11 | dim_12 | class_vals | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | -0.81101 | -7.2382 | 1.5429 | -0.64774 | 1.42710 | 0.61356 | 0.365160 | 0.088906 | 0.47031 | 0.98844 | 0.044692 | 0.20817 | 0.51140 | 1 |
1 | 0 | 1 | -0.37028 | -7.1336 | 1.8856 | -0.34316 | 0.96733 | 0.32763 | 0.429880 | 0.504790 | 0.41533 | 0.28804 | 0.086109 | 0.62690 | 0.78115 | 1 |
2 | 0 | 2 | 0.59659 | -8.3059 | 1.6943 | -0.66611 | 0.34967 | -0.17425 | 0.820770 | 1.261100 | 0.41653 | 0.50050 | 0.571630 | 0.45316 | 0.64465 | 1 |
3 | 0 | 3 | 1.45850 | -8.1957 | 1.8454 | -1.14960 | 0.82660 | -0.51313 | 0.067443 | 0.256370 | 0.11500 | -0.10915 | 0.085991 | 0.69064 | 0.33769 | 1 |
4 | 0 | 4 | 2.08240 | -8.6670 | 1.1995 | -1.12400 | 1.24450 | -0.10251 | 0.998670 | 0.571740 | 1.03840 | 0.17564 | -0.032857 | 0.53229 | 0.32941 | 1 |
train['reading_id'].min(), train['reading_id'].max()
(0, 92)
train['class_vals'].unique()
array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], dtype=object)
%%time
test = get_mtsc_data_tabular_from_ts(upstream['core']['SpokenArabicDigits_TEST_TS'])
test.shape
Reading dataset TS file... Converting _x to tabular format... Converting _y to tabular format... Merging _x and _y... CPU times: user 7.79 s, sys: 45.6 ms, total: 7.83 s Wall time: 7.81 s
(87029, 16)
test.head()
case_id | reading_id | dim_0 | dim_1 | dim_2 | dim_3 | dim_4 | dim_5 | dim_6 | dim_7 | dim_8 | dim_9 | dim_10 | dim_11 | dim_12 | class_vals | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 1.2572 | -8.2449 | 0.8483 | -1.5782 | 0.4736 | -0.063273 | 0.424810 | 0.50017 | 0.70420 | 0.28973 | 0.076053 | 0.025883 | -0.22968 | 1 |
1 | 0 | 1 | 3.3638 | -9.0154 | 1.4104 | -1.5884 | 1.3725 | -0.334810 | 1.052900 | 0.89804 | 0.79525 | 0.74112 | -0.153510 | 0.517180 | 0.44204 | 1 |
2 | 0 | 2 | 3.4461 | -9.4871 | 1.3425 | -1.4066 | 1.4422 | 0.124470 | 0.581990 | 0.88984 | 0.90260 | 0.11521 | -0.047091 | 0.409890 | 0.81545 | 1 |
3 | 0 | 3 | 4.7730 | -10.0070 | 1.2143 | -2.0118 | 2.0864 | 0.285620 | 0.528680 | 0.33971 | 1.16880 | 0.42569 | -0.470990 | 0.508600 | -0.33061 | 1 |
4 | 0 | 4 | 5.3317 | -9.6834 | 1.5131 | -2.3545 | 1.6933 | 0.066311 | -0.088666 | 0.16826 | 0.24546 | -0.34749 | -0.098748 | 0.810930 | -0.12837 | 1 |
test['reading_id'].min(), test['reading_id'].max()
(0, 82)
test['class_vals'].unique()
array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], dtype=object)
from sklearn.model_selection import train_test_split
X = train[['case_id', 'class_vals']].drop_duplicates()
X_train, X_val, y_train, y_val = train_test_split(X, X['class_vals'], train_size=0.8, random_state = 42)
X_train.case_id.nunique(), X_val.case_id.nunique()
(5279, 1320)
X_train.groupby(by = ['class_vals'], dropna = False).count()
case_id | |
---|---|
class_vals | |
1 | 508 |
10 | 538 |
2 | 542 |
3 | 513 |
4 | 546 |
5 | 537 |
6 | 524 |
7 | 538 |
8 | 521 |
9 | 512 |
X_val.groupby(by = ['class_vals'], dropna = False).count()
case_id | |
---|---|
class_vals | |
1 | 152 |
10 | 121 |
2 | 118 |
3 | 147 |
4 | 114 |
5 | 123 |
6 | 136 |
7 | 122 |
8 | 139 |
9 | 148 |
test[['case_id', 'class_vals']].drop_duplicates().groupby(by = ['class_vals'], dropna = False).count()
case_id | |
---|---|
class_vals | |
1 | 220 |
10 | 219 |
2 | 220 |
3 | 220 |
4 | 220 |
5 | 220 |
6 | 220 |
7 | 220 |
8 | 220 |
9 | 220 |
valid = train.merge(X_val, on=['case_id'], how='inner')
valid['class_vals'] = valid['class_vals_x']
valid = valid.drop(columns=['class_vals_x','class_vals_y'])
valid.case_id.nunique()
1320
train = train.merge(X_train, on=['case_id'], how='inner')
train['class_vals'] = train['class_vals_x']
train = train.drop(columns=['class_vals_x','class_vals_y'])
train.case_id.nunique()
5279
train.case_id.nunique(), valid.case_id.nunique(), test.case_id.nunique()
(5279, 1320, 2199)
train
case_id | reading_id | dim_0 | dim_1 | dim_2 | dim_3 | dim_4 | dim_5 | dim_6 | dim_7 | dim_8 | dim_9 | dim_10 | dim_11 | dim_12 | class_vals | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | -0.81101 | -7.2382 | 1.5429 | -0.64774 | 1.427100 | 0.61356 | 0.365160 | 0.088906 | 0.47031 | 0.98844 | 0.044692 | 0.20817 | 0.51140 | 1 |
1 | 0 | 1 | -0.37028 | -7.1336 | 1.8856 | -0.34316 | 0.967330 | 0.32763 | 0.429880 | 0.504790 | 0.41533 | 0.28804 | 0.086109 | 0.62690 | 0.78115 | 1 |
2 | 0 | 2 | 0.59659 | -8.3059 | 1.6943 | -0.66611 | 0.349670 | -0.17425 | 0.820770 | 1.261100 | 0.41653 | 0.50050 | 0.571630 | 0.45316 | 0.64465 | 1 |
3 | 0 | 3 | 1.45850 | -8.1957 | 1.8454 | -1.14960 | 0.826600 | -0.51313 | 0.067443 | 0.256370 | 0.11500 | -0.10915 | 0.085991 | 0.69064 | 0.33769 | 1 |
4 | 0 | 4 | 2.08240 | -8.6670 | 1.1995 | -1.12400 | 1.244500 | -0.10251 | 0.998670 | 0.571740 | 1.03840 | 0.17564 | -0.032857 | 0.53229 | 0.32941 | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
210689 | 6598 | 27 | 5.70890 | -5.4820 | -5.4035 | -2.53300 | -0.406580 | -0.37694 | -1.354600 | -0.740230 | -1.67430 | -0.45117 | -0.947710 | 0.52619 | -0.54065 | 10 |
210690 | 6598 | 28 | 5.07440 | -4.3477 | -4.1986 | -2.45780 | 0.275810 | -0.15892 | -0.655600 | -0.635580 | -1.19440 | -0.59201 | -0.852560 | 0.59287 | -0.50689 | 10 |
210691 | 6598 | 29 | 4.31890 | -3.6047 | -3.7113 | -2.83280 | 0.213910 | -0.18953 | -0.636860 | -0.940760 | -1.10920 | -0.75260 | -0.643950 | 0.58321 | -0.73842 | 10 |
210692 | 6598 | 30 | 2.72680 | -3.7538 | -2.5978 | -3.42900 | -0.067911 | -0.41058 | -0.692010 | -1.525700 | -1.24440 | -0.79156 | -0.885680 | -0.10524 | -0.59349 | 10 |
210693 | 6598 | 31 | 1.21410 | -3.9318 | -1.6229 | -1.84440 | -0.190920 | -1.07420 | -1.255600 | -0.923660 | -0.92309 | -1.03250 | -0.521680 | 0.24364 | -0.07214 | 10 |
210694 rows × 16 columns
#| export
import cudf
import dask_cudf
import pandas as pd
def write_parquet(pandas_df, output_dir, npartitions = 2):
pandas_df['class_vals'].replace(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], inplace = True)
gdf = cudf.from_pandas(pandas_df)
gdf['case_id_seq'] = gdf['case_id']
dask_gdf = dask_cudf.from_cudf(gdf, npartitions = npartitions)
dask_gdf.to_parquet(output_dir)
import time
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
cluster = LocalCUDACluster(memory_limit='auto', device_memory_limit=0.2, rmm_pool_size='20GB', rmm_managed_memory=True)
client = Client(cluster)
client
2022-09-23 19:00:53,424 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:00:53,424 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:00:53,451 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:00:53,451 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:00:53,484 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:00:53,484 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-09-23 19:00:53,505 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-09-23 19:00:53,505 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
Client-0ca00b4c-3b72-11ed-80d1-061eae6df733
Connection method: Cluster object | Cluster type: dask_cuda.LocalCUDACluster |
Dashboard: http://127.0.0.1:8787/status |
69eca070
Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
Total threads: 4 | Total memory: 150.00 GiB |
Status: running | Using processes: True |
Scheduler-b3089c29-1bfc-48e4-8f0a-a0c27fb4fb75
Comm: tcp://127.0.0.1:41757 | Workers: 4 |
Dashboard: http://127.0.0.1:8787/status | Total threads: 4 |
Started: Just now | Total memory: 150.00 GiB |
Comm: tcp://127.0.0.1:40237 | Total threads: 1 |
Dashboard: http://127.0.0.1:46053/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:44123 | |
Local directory: /tmp/dask-worker-space/worker-lwybrmor | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:34107 | Total threads: 1 |
Dashboard: http://127.0.0.1:32799/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:42839 | |
Local directory: /tmp/dask-worker-space/worker-z1p8ljns | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:35937 | Total threads: 1 |
Dashboard: http://127.0.0.1:36879/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:43947 | |
Local directory: /tmp/dask-worker-space/worker-01zwvuc6 | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Comm: tcp://127.0.0.1:32819 | Total threads: 1 |
Dashboard: http://127.0.0.1:34691/status | Memory: 37.50 GiB |
Nanny: tcp://127.0.0.1:34687 | |
Local directory: /tmp/dask-worker-space/worker-hlf3s86e | |
GPU: Tesla T4 | GPU memory: 14.76 GiB |
Train Dataset
%%time
write_parquet(train, product['SpokenArabicDigits_TRAIN_RAW'])
CPU times: user 1.53 s, sys: 448 ms, total: 1.98 s Wall time: 3.7 s
train_gdf = dask_cudf.read_parquet(product['SpokenArabicDigits_TRAIN_RAW'])
train_gdf.head()
case_id | reading_id | dim_0 | dim_1 | dim_2 | dim_3 | dim_4 | dim_5 | dim_6 | dim_7 | dim_8 | dim_9 | dim_10 | dim_11 | dim_12 | class_vals | case_id_seq | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | -0.81101 | -7.2382 | 1.5429 | -0.64774 | 1.42710 | 0.61356 | 0.365160 | 0.088906 | 0.47031 | 0.98844 | 0.044692 | 0.20817 | 0.51140 | 0 | 0 |
1 | 0 | 1 | -0.37028 | -7.1336 | 1.8856 | -0.34316 | 0.96733 | 0.32763 | 0.429880 | 0.504790 | 0.41533 | 0.28804 | 0.086109 | 0.62690 | 0.78115 | 0 | 0 |
2 | 0 | 2 | 0.59659 | -8.3059 | 1.6943 | -0.66611 | 0.34967 | -0.17425 | 0.820770 | 1.261100 | 0.41653 | 0.50050 | 0.571630 | 0.45316 | 0.64465 | 0 | 0 |
3 | 0 | 3 | 1.45850 | -8.1957 | 1.8454 | -1.14960 | 0.82660 | -0.51313 | 0.067443 | 0.256370 | 0.11500 | -0.10915 | 0.085991 | 0.69064 | 0.33769 | 0 | 0 |
4 | 0 | 4 | 2.08240 | -8.6670 | 1.1995 | -1.12400 | 1.24450 | -0.10251 | 0.998670 | 0.571740 | 1.03840 | 0.17564 | -0.032857 | 0.53229 | 0.32941 | 0 | 0 |
train_gdf['reading_id'].min().compute(), train_gdf['reading_id'].max().compute()
(0, 92)
train_gdf.case_id.nunique().compute()
5279
Valid Dataset
%%time
write_parquet(valid, product['SpokenArabicDigits_VALID_RAW'])
CPU times: user 77.7 ms, sys: 6.76 ms, total: 84.4 ms Wall time: 158 ms
valid_gdf = dask_cudf.read_parquet(product['SpokenArabicDigits_VALID_RAW'])
valid_gdf.head()
case_id | reading_id | dim_0 | dim_1 | dim_2 | dim_3 | dim_4 | dim_5 | dim_6 | dim_7 | dim_8 | dim_9 | dim_10 | dim_11 | dim_12 | class_vals | case_id_seq | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 8 | 0 | -1.0892 | -8.7579 | 0.83393 | -0.33022 | 1.30250 | 1.197300 | 1.31660 | -0.105040 | 0.96489 | 0.575990 | 0.26403 | 0.92761 | 0.18037 | 0 | 8 |
1 | 8 | 1 | -3.3416 | -7.7889 | 1.50210 | 0.41732 | 1.42310 | 0.696770 | 0.88998 | 0.716740 | 0.75735 | 0.677830 | 0.77345 | 0.70858 | 0.70686 | 0 | 8 |
2 | 8 | 2 | -1.8662 | -5.2297 | 1.44100 | -0.37156 | 0.13840 | 0.176820 | 0.30345 | 0.072609 | 0.11270 | 0.179590 | 0.34612 | 0.33175 | 0.40493 | 0 | 8 |
3 | 8 | 3 | -2.3750 | -6.8519 | 1.44270 | -0.87851 | 1.29820 | 0.152430 | 0.45129 | 0.293990 | 0.33066 | -0.022868 | 0.13106 | 0.99790 | 0.34434 | 0 | 8 |
4 | 8 | 4 | -1.9728 | -4.1038 | 1.28990 | -0.42268 | 0.55998 | 0.095705 | 0.37113 | 0.304230 | 0.22817 | 0.104420 | 0.35641 | 0.25528 | 0.43961 | 0 | 8 |
valid_gdf['reading_id'].min().compute(), valid_gdf['reading_id'].max().compute()
(0, 76)
valid_gdf.case_id.nunique().compute()
1320
Test Dataset
%%time
write_parquet(test, product['SpokenArabicDigits_TEST_RAW'])
CPU times: user 98.9 ms, sys: 10.7 ms, total: 110 ms Wall time: 184 ms
test_gdf = dask_cudf.read_parquet(product['SpokenArabicDigits_TEST_RAW'])
test_gdf.head()
case_id | reading_id | dim_0 | dim_1 | dim_2 | dim_3 | dim_4 | dim_5 | dim_6 | dim_7 | dim_8 | dim_9 | dim_10 | dim_11 | dim_12 | class_vals | case_id_seq | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 1.2572 | -8.2449 | 0.8483 | -1.5782 | 0.4736 | -0.063273 | 0.424810 | 0.50017 | 0.70420 | 0.28973 | 0.076053 | 0.025883 | -0.22968 | 0 | 0 |
1 | 0 | 1 | 3.3638 | -9.0154 | 1.4104 | -1.5884 | 1.3725 | -0.334810 | 1.052900 | 0.89804 | 0.79525 | 0.74112 | -0.153510 | 0.517180 | 0.44204 | 0 | 0 |
2 | 0 | 2 | 3.4461 | -9.4871 | 1.3425 | -1.4066 | 1.4422 | 0.124470 | 0.581990 | 0.88984 | 0.90260 | 0.11521 | -0.047091 | 0.409890 | 0.81545 | 0 | 0 |
3 | 0 | 3 | 4.7730 | -10.0070 | 1.2143 | -2.0118 | 2.0864 | 0.285620 | 0.528680 | 0.33971 | 1.16880 | 0.42569 | -0.470990 | 0.508600 | -0.33061 | 0 | 0 |
4 | 0 | 4 | 5.3317 | -9.6834 | 1.5131 | -2.3545 | 1.6933 | 0.066311 | -0.088666 | 0.16826 | 0.24546 | -0.34749 | -0.098748 | 0.810930 | -0.12837 | 0 | 0 |
test_gdf['reading_id'].min().compute(), test_gdf['reading_id'].max().compute()
(0, 82)
test_gdf.case_id.nunique().compute()
2199
We reset the kernel!!!
%%time
client.shutdown()
client.close()
Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1211, in _reconnect await self._ensure_connected(timeout=timeout) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1241, in _ensure_connected comm = await connect( File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect await asyncio.sleep(backoff) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/asyncio/tasks.py", line 659, in sleep return await future asyncio.exceptions.CancelledError Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1400, in _handle_report await self._reconnect() File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/utils.py", line 778, in wrapper return await func(*args, **kwargs) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1211, in _reconnect await self._ensure_connected(timeout=timeout) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/client.py", line 1241, in _ensure_connected comm = await connect( File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect await asyncio.sleep(backoff) File "/home/ubuntu/anaconda3/envs/rapids-22.08_ploomber/lib/python3.8/asyncio/tasks.py", line 659, in sleep return await future asyncio.exceptions.CancelledError
CPU times: user 37.9 ms, sys: 6.49 ms, total: 44.4 ms Wall time: 618 ms
from nbdev import nbdev_export
nbdev_export()