{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#| default_exp feature_preprocessing.face_detection.tabular_to_timeseries\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "# declare a list tasks whose products you want to use as inputs\n",
    "upstream = ['feature_preprocessing_face_detection']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#| hide\n",
    "from nbdev.showdoc import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "from vitmtsc import *\n",
    "from vitmtsc.core import *\n",
    "from vitmtsc.data.face_detection import *\n",
    "from vitmtsc.feature_preprocessing.face_detection.target_encoding import *\n",
    "import os\n",
    "import glob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "upstream = {\n",
    "    \"feature_preprocessing_face_detection\": {\n",
    "        \"nb\": \"/home/ubuntu/vitmtsc_nbdev/output/201_feature_preprocessing.face_detection.target_encoding.html\",\n",
    "        \"FaceDetection_TRAIN_TE\": \"/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/train\",\n",
    "        \"FaceDetection_VALID_TE\": \"/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/valid\",\n",
    "        \"FaceDetection_TEST_TE\": \"/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/test\",\n",
    "        \"FaceDetection_workflow_dir\": \"/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/nvtabular_workflow\",\n",
    "    }\n",
    "}\n",
    "product = {\n",
    "    \"nb\": \"/home/ubuntu/vitmtsc_nbdev/output/301_feature_preprocessing.face_detection.tabular_to_timeseries.html\",\n",
    "    \"FaceDetection_TRAIN_MODEL_INPUT\": \"/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/train\",\n",
    "    \"FaceDetection_VALID_MODEL_INPUT\": \"/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/valid\",\n",
    "    \"FaceDetection_TEST_MODEL_INPUT\": \"/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/test\",\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature Preprocessing for Neural Networks - III\n",
    "\n",
    "> Convert Category Encoding data from tabular to time-series format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dask.distributed import Client\n",
    "from dask_cuda import LocalCUDACluster\n",
    "\n",
    "cluster = LocalCUDACluster(memory_limit='auto', device_memory_limit=0.5, rmm_pool_size='20GB', rmm_managed_memory=True)\n",
    "client = Client(cluster)\n",
    "client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "DATASET_NAME = 'FaceDetection'\n",
    "SEQUENCE_LENGTH = 62\n",
    "NUMBER_OF_FEATURES = 144\n",
    "NUM_TARGET = 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__Convert from Tabular to Time-Series Format__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "MTSC_COLUMN_NAMES = [\n",
    "'dim_0',\n",
    "'dim_1',\n",
    "'dim_2',\n",
    "'dim_3',\n",
    "'dim_4',\n",
    "'dim_5',\n",
    "'dim_6',\n",
    "'dim_7',\n",
    "'dim_8',\n",
    "'dim_9',\n",
    "'dim_10',\n",
    "'dim_11',\n",
    "'dim_12',\n",
    "'dim_13',\n",
    "'dim_14',\n",
    "'dim_15',\n",
    "'dim_16',\n",
    "'dim_17',\n",
    "'dim_18',\n",
    "'dim_19',\n",
    "'dim_20',\n",
    "'dim_21',\n",
    "'dim_22',\n",
    "'dim_23',\n",
    "'dim_24',\n",
    "'dim_25',\n",
    "'dim_26',\n",
    "'dim_27',\n",
    "'dim_28',\n",
    "'dim_29',\n",
    "'dim_30',\n",
    "'dim_31',\n",
    "'dim_32',\n",
    "'dim_33',\n",
    "'dim_34',\n",
    "'dim_35',\n",
    "'dim_36',\n",
    "'dim_37',\n",
    "'dim_38',\n",
    "'dim_39',\n",
    "'dim_40',\n",
    "'dim_41',\n",
    "'dim_42',\n",
    "'dim_43',\n",
    "'dim_44',\n",
    "'dim_45',\n",
    "'dim_46',\n",
    "'dim_47',\n",
    "'dim_48',\n",
    "'dim_49',\n",
    "'dim_50',\n",
    "'dim_51',\n",
    "'dim_52',\n",
    "'dim_53',\n",
    "'dim_54',\n",
    "'dim_55',\n",
    "'dim_56',\n",
    "'dim_57',\n",
    "'dim_58',\n",
    "'dim_59',\n",
    "'dim_60',\n",
    "'dim_61',\n",
    "'dim_62',\n",
    "'dim_63',\n",
    "'dim_64',\n",
    "'dim_65',\n",
    "'dim_66',\n",
    "'dim_67',\n",
    "'dim_68',\n",
    "'dim_69',\n",
    "'dim_70',\n",
    "'dim_71',\n",
    "'dim_72',\n",
    "'dim_73',\n",
    "'dim_74',\n",
    "'dim_75',\n",
    "'dim_76',\n",
    "'dim_77',\n",
    "'dim_78',\n",
    "'dim_79',\n",
    "'dim_80',\n",
    "'dim_81',\n",
    "'dim_82',\n",
    "'dim_83',\n",
    "'dim_84',\n",
    "'dim_85',\n",
    "'dim_86',\n",
    "'dim_87',\n",
    "'dim_88',\n",
    "'dim_89',\n",
    "'dim_90',\n",
    "'dim_91',\n",
    "'dim_92',\n",
    "'dim_93',\n",
    "'dim_94',\n",
    "'dim_95',\n",
    "'dim_96',\n",
    "'dim_97',\n",
    "'dim_98',\n",
    "'dim_99',\n",
    "'dim_100',\n",
    "'dim_101',\n",
    "'dim_102',\n",
    "'dim_103',\n",
    "'dim_104',\n",
    "'dim_105',\n",
    "'dim_106',\n",
    "'dim_107',\n",
    "'dim_108',\n",
    "'dim_109',\n",
    "'dim_110',\n",
    "'dim_111',\n",
    "'dim_112',\n",
    "'dim_113',\n",
    "'dim_114',\n",
    "'dim_115',\n",
    "'dim_116',\n",
    "'dim_117',\n",
    "'dim_118',\n",
    "'dim_119',\n",
    "'dim_120',\n",
    "'dim_121',\n",
    "'dim_122',\n",
    "'dim_123',\n",
    "'dim_124',\n",
    "'dim_125',\n",
    "'dim_126',\n",
    "'dim_127',\n",
    "'dim_128',\n",
    "'dim_129',\n",
    "'dim_130',\n",
    "'dim_131',\n",
    "'dim_132',\n",
    "'dim_133',\n",
    "'dim_134',\n",
    "'dim_135',\n",
    "'dim_136',\n",
    "'dim_137',\n",
    "'dim_138',\n",
    "'dim_139',\n",
    "'dim_140',\n",
    "'dim_141',\n",
    "'dim_142',\n",
    "'dim_143']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "ALL_COLUMNS = ['case_id', 'case_id_seq', 'reading_id'] + MTSC_COLUMN_NAMES + ['class_vals']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__Input Data Location__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_encoded_train_dir = os.path.join(\"./\", upstream['feature_preprocessing_face_detection']['FaceDetection_TRAIN_TE'])\n",
    "target_encoded_valid_dir = os.path.join(\"./\", upstream['feature_preprocessing_face_detection']['FaceDetection_VALID_TE'])\n",
    "target_encoded_test_dir = os.path.join(\"./\", upstream['feature_preprocessing_face_detection']['FaceDetection_TEST_TE'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__Output Data Location__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_train_dir = os.path.join(\"./\", product['FaceDetection_TRAIN_MODEL_INPUT'])\n",
    "output_valid_dir = os.path.join(\"./\", product['FaceDetection_VALID_MODEL_INPUT'])\n",
    "output_test_dir = os.path.join(\"./\", product['FaceDetection_TEST_MODEL_INPUT'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir -p $output_train_dir\n",
    "!mkdir -p $output_valid_dir\n",
    "!mkdir -p $output_test_dir"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <center> Train Dataset Conversion </center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__Tabular to Time-Series format conversion__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "convert_from_tabular_to_timeseries_format(input_dir = target_encoded_train_dir, \n",
    "                                          output_dir = output_train_dir, \n",
    "                                          all_columns = ALL_COLUMNS,\n",
    "                                          mtsc_column_names = MTSC_COLUMN_NAMES,\n",
    "                                          chunk_size_processing = 50000,\n",
    "                                          number_of_features = NUMBER_OF_FEATURES, \n",
    "                                          seq_len = SEQUENCE_LENGTH,\n",
    "                                          chunk_size_file = 10000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <center> Valid Dataset Conversion </center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__Tabular to Time-Series format conversion__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "convert_from_tabular_to_timeseries_format(input_dir = target_encoded_valid_dir, \n",
    "                                          output_dir = output_valid_dir, \n",
    "                                          all_columns = ALL_COLUMNS,\n",
    "                                          mtsc_column_names = MTSC_COLUMN_NAMES,\n",
    "                                          chunk_size_processing = 50000,\n",
    "                                          number_of_features = NUMBER_OF_FEATURES, \n",
    "                                          seq_len = SEQUENCE_LENGTH,\n",
    "                                          chunk_size_file = 10000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <center> Test Dataset Conversion </center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__Tabular to Time-Series format conversion__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "convert_from_tabular_to_timeseries_format(input_dir = target_encoded_test_dir, \n",
    "                                          output_dir = output_test_dir, \n",
    "                                          all_columns = ALL_COLUMNS,\n",
    "                                          mtsc_column_names = MTSC_COLUMN_NAMES,\n",
    "                                          chunk_size_processing = 50000,\n",
    "                                          number_of_features = NUMBER_OF_FEATURES, \n",
    "                                          seq_len = SEQUENCE_LENGTH,\n",
    "                                          chunk_size_file = 10000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <center> Verify Datasets </center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import dask_cudf\n",
    "train_gdf = dask_cudf.read_parquet(output_train_dir)\n",
    "train_gdf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_gdf.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "train_gdf['case_id'].nunique().compute(), train_gdf['class_vals'].nunique().compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import dask_cudf\n",
    "valid_gdf = dask_cudf.read_parquet(output_valid_dir)\n",
    "valid_gdf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "valid_gdf.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "valid_gdf['case_id'].nunique().compute(), valid_gdf['class_vals'].nunique().compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import dask_cudf\n",
    "test_gdf = dask_cudf.read_parquet(output_test_dir)\n",
    "test_gdf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_gdf.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "test_gdf['case_id'].nunique().compute(), test_gdf['class_vals'].nunique().compute()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__We reset the kernel!!!__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "client.shutdown()\n",
    "client.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nbdev import nbdev_export\n",
    "nbdev_export()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "rapids-22.08_ploomber",
   "language": "python",
   "name": "rapids-22.08_ploomber"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  },
  "papermill": {
   "environment_variables": {},
   "parameters": {
    "product": {
     "FaceDetection_TEST_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/test",
     "FaceDetection_TRAIN_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/train",
     "FaceDetection_VALID_MODEL_INPUT": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding-nn/valid",
     "nb": "/home/ubuntu/vitmtsc_nbdev/output/301_feature_preprocessing.face_detection.tabular_to_timeseries.html"
    },
    "upstream": {
     "feature_preprocessing_face_detection": {
      "FaceDetection_TEST_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/test",
      "FaceDetection_TRAIN_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/train",
      "FaceDetection_VALID_TE": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/valid",
      "FaceDetection_workflow_dir": "/home/ubuntu/vitmtsc_nbdev/output/FaceDetection/target_encoding/nvtabular_workflow",
      "nb": "/home/ubuntu/vitmtsc_nbdev/output/201_feature_preprocessing.face_detection.target_encoding.html"
     }
    }
   },
   "version": null
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
