{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.2.2+cu121\n"
     ]
    }
   ],
   "source": [
    "import torch"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "67d38e7b-067d-4ec6-aaf4-58c4d9d2d004"
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "327d87f1-cfb2-489f-aa06-98b1eb0b5d2e",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"7\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d1383b40-1b0e-4cba-a27a-16e0c68f7493",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.cuda.device_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "62918c4b-8573-477d-8071-ea18231d0e2e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU is available\n"
     ]
    }
   ],
   "source": [
    "if torch.cuda.is_available():\n",
    "    print(\"GPU is available\")\n",
    "else:\n",
    "    print(\"GPU is not available\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a9e0d790-0c55-491c-8224-48230370df00",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/minhnh/python_venv/nlp/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "2024-04-05 09:03:49.778936: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-04-05 09:03:49.778995: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-04-05 09:03:49.779720: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-04-05 09:03:49.785924: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2024-04-05 09:03:50.990070: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset, Dataset, DatasetDict\n",
    "import pandas as pd\n",
    "from huggingface_hub.hf_api import HfFolder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7deeda1f-055c-47b6-9ffa-434ef6027ee3",
   "metadata": {},
   "outputs": [],
   "source": [
    "HfFolder.save_token('')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7523200d-80ea-41bd-a5ae-c80e286914ba",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/minhnh/python_venv/nlp/lib/python3.9/site-packages/datasets/load.py:2483: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n",
      "You can remove this warning by passing 'token=<use_auth_token>' instead.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "dataset = load_dataset(\"TeeA/text2sql_vi\", use_auth_token=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "71e78237-81d2-4ae0-85ea-58fc1f1dc60c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'schema_syll': 'CREATE TABLE lab(subject_id text,hadm_id text,itemid text,charttime text,flag text,value_unit text,label text,fluid text) CREATE TABLE thủ tục(subject_id text,hadm_id text,icd9_code text,short_title text,long_title text) CREATE TABLE nhân khẩu học(subject_id text,hadm_id text,name text,marital_status text,age text,dob text,giới tính text,ngôn ngữ text,tôn giáo text,loại_nhập viện text,ngày_ở text,bảo hiểm text,dân tộc text,hết hạn_cờ text,vị trí_nhập viện text,vị trí xuất viện text,chẩn đoán text,dod text,dob_year text,dod_year text,thời gian nhập viện text,dischtime text,admityear text) CREATE TABLE đơn thuốc(subject_id text,hadm_id text,icustay_id text,drug_type text,drug text,formulary_drug_cd text,route text,drug_dose text) CREATE TABLE chẩn đoán(subject_id text,hadm_id text,icd9_code text,short_title text,long_title text)',\n",
       " 'schema_word': 'CREATE TABLE lab(subject_id text,hadm_id text,itemid text,charttime text,flag text,value_unit text,label text,fluid text) CREATE TABLE thủ_tục(subject_id text,hadm_id text,icd9_code text,short_title text,long_title text) CREATE TABLE nhân_khẩu học(subject_id text,hadm_id text,name text,marital_status text,age text,dob text,giới_tính text,ngôn_ngữ text,tôn_giáo text,loại_nhập_viện text,ngày_ở text,bảo_hiểm text,dân_tộc text,hết hạn_cờ text,vị trí_nhập_viện text,vị_trí xuất_viện text,chẩn_đoán text,dod text,dob_year text,dod_year text,thời_gian nhập_viện text,dischtime text,admityear text) CREATE TABLE đơn thuốc(subject_id text,hadm_id text,icustay_id text,drug_type text,drug text,formulary_drug_cd text,route text,drug_dose text) CREATE TABLE chẩn_đoán(subject_id text,hadm_id text,icd9_code text,short_title text,long_title text)',\n",
       " 'query_syll': 'SELECT COUNT(DISTINCTnhân khẩu học.subject_id) FROM nhân khẩu học INNER JOIN thủ tục ON nhân khẩu học.hadm_id = thủ tục.hadm_id WHERE nhân khẩu học.gender = \"F\" AND thủ tục.long_title = \"chuyển nhịp nhĩ\"',\n",
       " 'source': 'mimicsql_data',\n",
       " 'question_syll': 'cho tôi xem số lượng bệnh nhân nữ đã trải qua chuyển nhịp nhĩ.',\n",
       " 'question_word': 'cho tôi xem số_lượng bệnh_nhân nữ đã trải qua chuyển nhịp nhĩ .',\n",
       " 'query_word': 'SELECT COUNT( DISTINCT nhân_khẩu học.subject_id) FROM nhân_khẩu học INNER JOIN thủ_tục ON nhân_khẩu học.hadm_id = thủ_tục.hadm_id WHERE nhân_khẩu học.gender = \"F\" AND thủ_tục.long_title = \"chuyển nhịp nhĩ\"'}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset['train'][0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "baeb045d-90bd-4c58-a121-098d798cb3a3",
   "metadata": {},
   "source": [
    "**meaning level**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "25018252-6008-47df-acf4-353e58072f39",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of RobertaModel were not initialized from the model checkpoint at vinai/phobert-base-v2 and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from transformers import AutoModel, AutoTokenizer\n",
    "\n",
    "phobert = AutoModel.from_pretrained(\"vinai/phobert-base-v2\",device_map =\"auto\")\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"vinai/phobert-base-v2\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "10cffd76-06d1-4fad-9765-87156975eae7",
   "metadata": {},
   "outputs": [],
   "source": [
    "instruction = dataset['train']['question_syll']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "7feffab0-f855-400f-a684-c53bb3765474",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.cuda.is_available ()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c096afda-b557-43eb-8a45-f1cfc8efe006",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 243964/243964 [27:39<00:00, 146.98it/s]\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "from underthesea import word_tokenize\n",
    "\n",
    "list_emb = []\n",
    "for i in tqdm(range(len(instruction))):\n",
    "    input_ids = torch.tensor([tokenizer.encode(instruction[i],truncation=True, max_length=254)])\n",
    "\n",
    "    with torch.no_grad():\n",
    "      features = phobert(input_ids.to('cuda:0'))\n",
    "    list_emb.append((i,features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "2ff3a5fd-2c91-4095-920b-d05fe2e36c87",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "ename": "UnpicklingError",
     "evalue": "pickle data was truncated",
     "output_type": "error",
     "traceback": [
      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[0;31mUnpicklingError\u001B[0m                           Traceback (most recent call last)",
      "Cell \u001B[0;32mIn[22], line 4\u001B[0m\n\u001B[1;32m      1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mpickle\u001B[39;00m\n\u001B[1;32m      3\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m (\u001B[38;5;28mopen\u001B[39m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmy_array.pickle\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mrb\u001B[39m\u001B[38;5;124m\"\u001B[39m)) \u001B[38;5;28;01mas\u001B[39;00m openfile:\n\u001B[0;32m----> 4\u001B[0m     test_list \u001B[38;5;241m=\u001B[39m \u001B[43mpickle\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mload\u001B[49m\u001B[43m(\u001B[49m\u001B[43mopenfile\u001B[49m\u001B[43m)\u001B[49m\n",
      "\u001B[0;31mUnpicklingError\u001B[0m: pickle data was truncated"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "\n",
    "with (open(\"my_array.pickle\", \"rb\")) as openfile:\n",
    "    test_list = pickle.load(openfile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "outputs": [],
   "source": [
    "list_emb_1 = [(i,ele[0][:, 0, :].to('cpu').numpy()) for i,ele in list_emb]\n",
    "list_emb_2 = [(i,ele.squeeze(0)) for i,ele in list_emb_1]"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "04e5690b-5360-461a-8f5b-861a103bc43c"
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "with open(\"my_array.pickle\", \"wb\") as f:\n",
    "    pickle.dump(list_emb_2, f)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "c36a0bad-43e3-4960-b9d8-92fb75cdb15e"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea2ed808-cc70-46a3-bc6b-4642c29692e7",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import numpy as np\n",
    "\n",
    "class NumpyEncoder(json.JSONEncoder):\n",
    "    def default(self, obj):\n",
    "        if isinstance(obj, np.ndarray):\n",
    "            return obj.tolist()\n",
    "        return json.JSONEncoder.default(self, obj)\n",
    "\n",
    "list_data = []\n",
    "for i,emb in list_emb_2:\n",
    "    list_data.append({\n",
    "        'index' : i,\n",
    "        'emb' : emb\n",
    "    })\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "85ecb0e7-65f4-47d9-882a-e8c2d16c8ec8",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(list_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "a9420889-ae65-4c87-a72d-aefd947f4a58",
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import Dataset\n",
    "dataset = Dataset.from_pandas(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "28917c9d-cf69-44fe-9a50-5293d109ec3d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Uploading the dataset shards:   0%|          | 0/2 [00:00<?, ?it/s]\n",
      "Creating parquet from Arrow format:   0%|          | 0/122 [00:00<?, ?ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:   3%|▎         | 4/122 [00:00<00:03, 31.00ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:   7%|▋         | 8/122 [00:00<00:07, 15.88ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  10%|▉         | 12/122 [00:00<00:05, 20.35ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  13%|█▎        | 16/122 [00:00<00:04, 23.61ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  16%|█▌        | 19/122 [00:00<00:04, 24.63ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  19%|█▉        | 23/122 [00:00<00:03, 26.96ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  21%|██▏       | 26/122 [00:01<00:03, 27.20ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  24%|██▍       | 29/122 [00:01<00:04, 20.77ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  27%|██▋       | 33/122 [00:01<00:03, 23.83ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  30%|███       | 37/122 [00:01<00:03, 25.36ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  34%|███▎      | 41/122 [00:01<00:02, 27.25ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  37%|███▋      | 45/122 [00:01<00:02, 28.41ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  40%|████      | 49/122 [00:01<00:02, 29.57ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  43%|████▎     | 53/122 [00:02<00:02, 30.11ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  47%|████▋     | 57/122 [00:02<00:02, 30.80ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  50%|█████     | 61/122 [00:02<00:01, 31.41ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  53%|█████▎    | 65/122 [00:02<00:01, 29.48ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  57%|█████▋    | 69/122 [00:02<00:01, 30.17ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  60%|█████▉    | 73/122 [00:02<00:01, 30.42ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  63%|██████▎   | 77/122 [00:02<00:01, 29.97ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  66%|██████▋   | 81/122 [00:02<00:01, 29.74ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  70%|██████▉   | 85/122 [00:03<00:01, 30.42ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  73%|███████▎  | 89/122 [00:03<00:01, 30.70ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  76%|███████▌  | 93/122 [00:03<00:00, 29.54ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  80%|███████▉  | 97/122 [00:03<00:00, 30.11ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  83%|████████▎ | 101/122 [00:03<00:00, 30.67ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  86%|████████▌ | 105/122 [00:03<00:00, 31.29ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  89%|████████▉ | 109/122 [00:03<00:00, 31.02ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  93%|█████████▎| 113/122 [00:04<00:00, 31.22ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  96%|█████████▌| 117/122 [00:04<00:00, 30.94ba/s]\u001B[A\n",
      "Creating parquet from Arrow format: 100%|██████████| 122/122 [00:04<00:00, 28.32ba/s]\u001B[A\n",
      "Uploading the dataset shards:  50%|█████     | 1/2 [00:19<00:19, 19.62s/it]\n",
      "Creating parquet from Arrow format:   0%|          | 0/122 [00:00<?, ?ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:   3%|▎         | 4/122 [00:00<00:03, 30.60ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:   7%|▋         | 8/122 [00:00<00:03, 29.71ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  10%|▉         | 12/122 [00:00<00:03, 30.50ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  13%|█▎        | 16/122 [00:00<00:03, 31.45ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  16%|█▋        | 20/122 [00:00<00:03, 31.40ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  20%|█▉        | 24/122 [00:00<00:03, 31.66ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  23%|██▎       | 28/122 [00:00<00:02, 32.19ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  26%|██▌       | 32/122 [00:01<00:02, 32.38ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  30%|██▉       | 36/122 [00:01<00:02, 32.21ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  33%|███▎      | 40/122 [00:01<00:02, 31.81ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  36%|███▌      | 44/122 [00:01<00:02, 31.89ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  39%|███▉      | 48/122 [00:01<00:02, 32.22ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  43%|████▎     | 52/122 [00:01<00:02, 32.36ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  46%|████▌     | 56/122 [00:01<00:02, 32.17ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  49%|████▉     | 60/122 [00:01<00:01, 32.46ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  52%|█████▏    | 64/122 [00:01<00:01, 32.96ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  56%|█████▌    | 68/122 [00:02<00:01, 33.31ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  59%|█████▉    | 72/122 [00:02<00:01, 33.20ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  62%|██████▏   | 76/122 [00:02<00:01, 31.49ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  66%|██████▌   | 80/122 [00:02<00:01, 31.62ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  69%|██████▉   | 84/122 [00:02<00:01, 32.13ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  72%|███████▏  | 88/122 [00:02<00:01, 32.37ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  75%|███████▌  | 92/122 [00:02<00:00, 31.89ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  79%|███████▊  | 96/122 [00:03<00:00, 31.70ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  82%|████████▏ | 100/122 [00:03<00:00, 31.72ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  85%|████████▌ | 104/122 [00:03<00:00, 32.64ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  89%|████████▊ | 108/122 [00:03<00:00, 33.34ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  92%|█████████▏| 112/122 [00:03<00:00, 34.92ba/s]\u001B[A\n",
      "Creating parquet from Arrow format:  95%|█████████▌| 116/122 [00:03<00:00, 36.01ba/s]\u001B[A\n",
      "Creating parquet from Arrow format: 100%|██████████| 122/122 [00:03<00:00, 32.70ba/s]\u001B[A\n",
      "Uploading the dataset shards: 100%|██████████| 2/2 [00:35<00:00, 17.59s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/datasets/hoangphu7122002ai/phobert_t2sql_embedding_syll/commit/593caa3f281b361f58401b497341e2ea9b213469', commit_message='Upload dataset', commit_description='', oid='593caa3f281b361f58401b497341e2ea9b213469', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.push_to_hub(\"hoangphu7122002ai/phobert_t2sql_embedding_syll\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "NLP",
   "language": "python",
   "name": "nlp"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
