{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6133ed20",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "import copy\n",
    "import numpy as np\n",
    "\n",
    "import torch.optim as optim\n",
    "\n",
    "if torch.cuda.is_available():\n",
    "    device = torch.device('cuda')\n",
    "else : \n",
    "    device = torch.device('cpu')\n",
    "print(device)\n",
    "\n",
    "from torch.nn import functional as F\n",
    "\n",
    "from torch.utils.data import DataLoader, TensorDataset\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import os\n",
    "import random\n",
    "import dataset.aug_index as ai # data augmentation and some index for reconstruction\n",
    "import NSC_module.modules_decoder as dec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6618ef63",
   "metadata": {},
   "outputs": [],
   "source": [
    "from lightning.pytorch import Trainer, seed_everything # used for seed setting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "148405c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "class VectorQuantizer(nn.Module):\n",
    "    def __init__(self, num_embeddings=16, embedding_dim=4, commitment_cost=0.25):\n",
    "        super().__init__()\n",
    "        self.embedding_dim = embedding_dim\n",
    "        self.num_embeddings = num_embeddings\n",
    "        self.commitment_cost = commitment_cost\n",
    "\n",
    "        # Codebook\n",
    "        self.embedding = nn.Embedding(num_embeddings, embedding_dim)\n",
    "        self.embedding.weight.data.uniform_(-1/num_embeddings, 1/num_embeddings)\n",
    "\n",
    "    def forward(self, z):\n",
    "        z_flat = z.view(-1, self.embedding_dim)  # [B, D]\n",
    "\n",
    "        distances = (\n",
    "            torch.sum(z_flat**2, dim=1, keepdim=True)\n",
    "            - 2 * torch.matmul(z_flat, self.embedding.weight.t())\n",
    "            + torch.sum(self.embedding.weight**2, dim=1)\n",
    "        )  # [B, num_embeddings]\n",
    "\n",
    "        encoding_indices = torch.argmin(distances, dim=1).unsqueeze(1)  # [B, 1]\n",
    "\n",
    "        encodings = torch.zeros(encoding_indices.size(0), self.num_embeddings, device=z.device)\n",
    "        encodings.scatter_(1, encoding_indices, 1)\n",
    "\n",
    "        quantized = torch.matmul(encodings, self.embedding.weight)  # [B, D]\n",
    "        quantized = quantized.view_as(z)\n",
    "\n",
    "        e_latent_loss = F.mse_loss(quantized.detach(), z)\n",
    "        q_latent_loss = F.mse_loss(quantized, z.detach())\n",
    "        loss = q_latent_loss + self.commitment_cost * e_latent_loss\n",
    "\n",
    "        quantized = z + (quantized - z).detach()\n",
    "        return quantized, loss, encoding_indices.view(z.size(0))\n",
    "\n",
    "class VQVAE(nn.Module):\n",
    "    def __init__(self, input_dim=128, latent_dim=4, num_embeddings=16, commitment_cost=0.25):\n",
    "        super().__init__()\n",
    "        self.encoder = nn.Sequential(\n",
    "            nn.Linear(input_dim, latent_dim),\n",
    "            nn.ReLU()\n",
    "        )\n",
    "        self.vq = VectorQuantizer(num_embeddings, latent_dim, commitment_cost)\n",
    "        self.decoder = dec.Base_decoder(latent_dim=latent_dim)\n",
    "\n",
    "    def forward(self, x):\n",
    "        z_e = self.encoder(x)\n",
    "\n",
    "        z_q, vq_loss, _ = self.vq(z_e)\n",
    "        \n",
    "        x_recon = self.decoder(z_q)\n",
    "        \n",
    "        return x_recon, vq_loss, z_q\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8476ab4d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Seed set to 1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20170622\n",
      "20170623\n",
      "20170629\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "Seed set to 2\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "Seed set to 3\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "Seed set to 4\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "Seed set to 5\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "Seed set to 1\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d533101\n",
      "d561106\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Seed set to 2\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "Seed set to 3\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "Seed set to 4\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "Seed set to 5\n",
      "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n"
     ]
    }
   ],
   "source": [
    "# training part\n",
    "\n",
    "cr = 32\n",
    "seed_list = [1, 2, 3, 4, 5]\n",
    "file_list = [[\"20170622\", \"20170623\", \"20170629\"], [\"d533101\", \"d561106\"]]\n",
    "\n",
    "results = {}\n",
    "\n",
    "train_epochs = 100\n",
    "\n",
    "for cho in range(0, 2):\n",
    "    choice = cho\n",
    "\n",
    "    results[f'{choice}'] = {}\n",
    "\n",
    "    psnr_valt = []\n",
    "    r2_valt = []\n",
    "    sndr_valt = []\n",
    "    nrmse_valt = []\n",
    "    psnr_mid_valt = []\n",
    "    r2_mid_valt = []\n",
    "    sndr_mid_valt = []\n",
    "    nrmse_mid_valt = []\n",
    "\n",
    "    files = file_list[choice]\n",
    "    X_list = []\n",
    "    y_list = []\n",
    "    cl_list = []\n",
    "    for i in range(len(files)):\n",
    "        print(files[i])\n",
    "\n",
    "        tX = np.load('./dataset/' + files[i] + '_wave.npy')\n",
    "        ty = np.load('./dataset/' + files[i] + '_neo.npy')\n",
    "\n",
    "        tcl = np.full(len(tX), i) \n",
    "        X_list.append(tX)  # Append the entire array\n",
    "        y_list.append(ty)\n",
    "        cl_list.append(tcl)\n",
    "\n",
    "    X = np.concatenate(X_list, axis=0)\n",
    "    y = np.concatenate(y_list, axis=0)\n",
    "    CL = np.concatenate(cl_list, axis=0)\n",
    "\n",
    "    for se in range (len(seed_list)):\n",
    "\n",
    "        save_name = \"./compare_pt/VQ_VAE_SEED_\" + str(se) + \"_CHOICE_\" + str(choice) + \".pt\"\n",
    "        psnr_val = []\n",
    "        r2_val = []\n",
    "        sndr_val = []\n",
    "        nrmse_val = []\n",
    "        psnr_mid_val = []\n",
    "        r2_mid_val = []\n",
    "        sndr_mid_val = []\n",
    "        nrmse_mid_val = []\n",
    "        \n",
    "        seed = seed_list[se]\n",
    "        seed_everything(seed, workers=True)\n",
    "\n",
    "        # Datasets\n",
    "        bs = 64\n",
    "\n",
    "        X_tmp, X_test, y_tmp, y_test, _, cl_test = train_test_split(X, y, CL, test_size=0.2)\n",
    "        X_train, X_valid, y_train, y_valid = train_test_split(X_tmp, y_tmp, test_size=0.125)\n",
    "\n",
    "        np.save(str(choice) + str(se) + \"vqvae_cl.npy\", np.array(cl_test))\n",
    "\n",
    "        X_train = np.array(X_train)\n",
    "\n",
    "        X_train = torch.tensor(X_train, dtype=torch.float32)\n",
    "        X_valid = torch.tensor(X_valid, dtype=torch.float32)\n",
    "        X_test  = torch.tensor(X_test , dtype=torch.float32)\n",
    "\n",
    "        y_train = torch.tensor(y_train, dtype=torch.float32)\n",
    "        y_valid = torch.tensor(y_valid, dtype=torch.float32)\n",
    "        y_test  = torch.tensor(y_test , dtype=torch.float32)\n",
    "\n",
    "        train_dataset = TensorDataset(X_train, y_train)\n",
    "        valid_dataset = TensorDataset(X_valid, y_valid)\n",
    "        test_dataset  = TensorDataset(X_test, y_test)\n",
    "\n",
    "        train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)\n",
    "        valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False)\n",
    "        test_loader  = DataLoader(test_dataset, batch_size=bs, shuffle=False)\n",
    "\n",
    "        best_psnr = -np.inf\n",
    "\n",
    "        model = VQVAE(latent_dim=int(128/cr)).to(device)\n",
    "\n",
    "        trainer = Trainer(deterministic=True)\n",
    "        \n",
    "        optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)\n",
    "\n",
    "        for epoch in range(train_epochs):\n",
    "            model.train()\n",
    "            running_loss = 0.0\n",
    "            for (inputs, inputs_neo) in train_loader:\n",
    "                inputs = inputs.to(device)\n",
    "\n",
    "                optimizer.zero_grad()\n",
    "                outputs, vq_loss, _ = model(inputs)\n",
    "\n",
    "                loss = F.mse_loss(outputs, inputs) + vq_loss\n",
    "                \n",
    "                loss.backward()\n",
    "                clip_norm = 10\n",
    "                total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_norm)\n",
    "                optimizer.step()\n",
    "                running_loss += loss.item()\n",
    "\n",
    "            #print(running_loss/len(train_loader))\n",
    "\n",
    "            model.eval()\n",
    "            all_recons = []\n",
    "            all_targets = []\n",
    "\n",
    "            with torch.no_grad():\n",
    "                for (inputs, inputs_neo) in valid_loader:\n",
    "                    inputs = inputs.to(device)\n",
    "\n",
    "                    outputs, _, _ = model(inputs)\n",
    "                    for i in inputs:\n",
    "                        all_targets.append(i.cpu().numpy())\n",
    "                    for o in outputs:\n",
    "                        all_recons.append(o.cpu().numpy())\n",
    "\n",
    "            tmp_psnr_mid = 0\n",
    "            for i in range (len(all_recons)):\n",
    "                tmp_psnr_mid += ai.psnr(all_recons[i][32:96], all_targets[i][32:96])\n",
    "            \n",
    "            psnr_mid_valv = tmp_psnr_mid / len(all_recons)\n",
    "            if psnr_mid_valv > best_psnr:\n",
    "                best_psnr = psnr_mid_valv\n",
    "                best_model_state = copy.deepcopy(model.state_dict())\n",
    "\n",
    "        torch.save(best_model_state, save_name)\n",
    "\n",
    "        o_list = []\n",
    "        i_list = []\n",
    "        z_list = []\n",
    "\n",
    "        best_model_state = torch.load(save_name)\n",
    "        model.load_state_dict(best_model_state)\n",
    "        \n",
    "        model.eval()\n",
    "        with torch.no_grad():\n",
    "            for (inputs, inputs_neo) in test_loader:\n",
    "                inputs = inputs.to(device)\n",
    "                outputs, _, z_q = model(inputs)\n",
    "                for i in inputs:\n",
    "                    i_list.append(i.cpu().numpy())\n",
    "                for o in outputs:\n",
    "                    o_list.append(o.cpu().numpy())\n",
    "                for z in z_q:\n",
    "                    z_list.append(o.cpu().numpy())\n",
    "\n",
    "        np.save(str(choice) + str(se) + \"vqvae_target.npy\", np.array(i_list))\n",
    "        np.save(str(choice) + str(se) + \"vqvae_recon.npy\", np.array(o_list))\n",
    "        np.save(str(choice) + str(se) + \"vqvae_latent.npy\", np.array(z_list))\n",
    "\n",
    "        for i in range(len(i_list)):\n",
    "            psnr_val.append(ai.psnr(o_list[i], i_list[i]))\n",
    "            r2_val.append(ai.r2_score(o_list[i], i_list[i]))\n",
    "            sndr_val.append(ai.sndr(o_list[i], i_list[i]))\n",
    "            nrmse_val.append(ai.nrmse(o_list[i], i_list[i]))\n",
    "            \n",
    "            psnr_mid_val.append(ai.psnr(o_list[i][32:96], i_list[i][32:96]))\n",
    "            r2_mid_val.append(ai.r2_score(o_list[i][32:96], i_list[i][32:96]))\n",
    "            sndr_mid_val.append(ai.sndr(o_list[i][32:96], i_list[i][32:96]))\n",
    "            nrmse_mid_val.append(ai.nrmse(o_list[i][32:96], i_list[i][32:96]))\n",
    "        \n",
    "        psnr_val = np.array(psnr_val)\n",
    "        r2_val = np.array(r2_val)\n",
    "        sndr_val = np.array(sndr_val)\n",
    "        nrmse_val = np.array(nrmse_val)\n",
    "        psnr_mid_val = np.array(psnr_mid_val)\n",
    "        r2_mid_val = np.array(r2_mid_val)\n",
    "        sndr_mid_val = np.array(sndr_mid_val)\n",
    "        nrmse_mid_val = np.array(nrmse_mid_val)\n",
    "\n",
    "        psnr_valt.append(psnr_val.mean())\n",
    "        r2_valt.append(r2_val.mean())\n",
    "        sndr_valt.append(sndr_val.mean())\n",
    "        nrmse_valt.append(nrmse_val.mean())\n",
    "        psnr_mid_valt.append(psnr_mid_val.mean())\n",
    "        r2_mid_valt.append(r2_mid_val.mean())\n",
    "        sndr_mid_valt.append(sndr_mid_val.mean())\n",
    "        nrmse_mid_valt.append(nrmse_mid_val.mean())\n",
    "\n",
    "    psnr_valt = np.array(psnr_valt)\n",
    "    r2_valt = np.array(r2_valt)\n",
    "    sndr_valt = np.array(sndr_valt)\n",
    "    nrmse_valt = np.array(nrmse_valt)\n",
    "    psnr_mid_valt = np.array(psnr_mid_valt)\n",
    "    r2_mid_valt = np.array(r2_mid_valt)\n",
    "    sndr_mid_valt = np.array(sndr_mid_valt)\n",
    "    nrmse_mid_valt = np.array(nrmse_mid_valt)\n",
    "\n",
    "    results[f'{choice}']['psnr_mean'] = psnr_valt.mean()\n",
    "    results[f'{choice}']['r2_mean'] = r2_valt.mean()\n",
    "    results[f'{choice}']['sndr_mean'] = sndr_valt.mean()\n",
    "    results[f'{choice}']['nrmse_mean'] = nrmse_valt.mean()\n",
    "    results[f'{choice}']['psnr_mid_mean'] = psnr_mid_valt.mean()\n",
    "    results[f'{choice}']['r2_mid_mean'] = r2_mid_valt.mean()\n",
    "    results[f'{choice}']['sndr_mid_mean'] = sndr_mid_valt.mean()\n",
    "    results[f'{choice}']['nrmse_mid_mean'] = nrmse_mid_valt.mean()\n",
    "    \n",
    "    results[f'{choice}']['psnr_std'] = psnr_valt.std()\n",
    "    results[f'{choice}']['r2_std'] = r2_valt.std()\n",
    "    results[f'{choice}']['sndr_std'] = sndr_valt.std()\n",
    "    results[f'{choice}']['nrmse_std'] = nrmse_valt.std()\n",
    "    results[f'{choice}']['psnr_mid_std'] = psnr_mid_valt.std()\n",
    "    results[f'{choice}']['r2_mid_std'] = r2_mid_valt.std()\n",
    "    results[f'{choice}']['sndr_mid_std'] = sndr_mid_valt.std()\n",
    "    results[f'{choice}']['nrmse_mid_std'] = nrmse_mid_valt.std()\n",
    "    \n",
    "    results[f'{choice}']['psnr_best'] = psnr_valt.max()\n",
    "    results[f'{choice}']['r2_best'] = r2_valt.max()\n",
    "    results[f'{choice}']['sndr_best'] = sndr_valt.max()\n",
    "    results[f'{choice}']['nrmse_best'] = nrmse_valt.min()\n",
    "    results[f'{choice}']['psnr_mid_best'] = psnr_mid_valt.max()\n",
    "    results[f'{choice}']['r2_mid_best'] = r2_mid_valt.max()\n",
    "    results[f'{choice}']['sndr_mid_best'] = sndr_mid_valt.max()\n",
    "    results[f'{choice}']['nrmse_mid_best'] = nrmse_mid_valt.min()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d4049196",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'0': {'psnr_mean': np.float32(18.628345), 'r2_mean': np.float32(0.31987792), 'sndr_mean': np.float32(3.339148), 'nrmse_mean': np.float32(0.12833653), 'psnr_mid_mean': np.float32(21.760239), 'r2_mid_mean': np.float32(0.6441327), 'sndr_mid_mean': np.float32(9.29225), 'nrmse_mid_mean': np.float32(0.10498979), 'psnr_std': np.float32(1.5862216), 'r2_std': np.float32(0.243066), 'sndr_std': np.float32(1.6018305), 'nrmse_std': np.float32(0.023474826), 'psnr_mid_std': np.float32(4.06411), 'r2_mid_std': np.float32(0.21749629), 'sndr_mid_std': np.float32(4.0684743), 'nrmse_mid_std': np.float32(0.047020625), 'psnr_best': np.float32(21.339273), 'r2_best': np.float32(0.6877347), 'sndr_best': np.float32(6.077328), 'nrmse_best': np.float32(0.090827905), 'psnr_mid_best': np.float32(25.779186), 'r2_mid_best': np.float32(0.9168928), 'sndr_mid_best': np.float32(13.338286), 'nrmse_mid_best': np.float32(0.058571603)}, '1': {'psnr_mean': np.float32(17.286888), 'r2_mean': np.float32(0.12248941), 'sndr_mean': np.float32(1.2306697), 'nrmse_mean': np.float32(0.14629035), 'psnr_mid_mean': np.float32(18.813417), 'r2_mid_mean': np.float32(0.58767956), 'sndr_mid_mean': np.float32(4.879726), 'nrmse_mid_mean': np.float32(0.122784875), 'psnr_std': np.float32(0.7920221), 'r2_std': np.float32(0.16916534), 'sndr_std': np.float32(0.8029306), 'nrmse_std': np.float32(0.013902737), 'psnr_mid_std': np.float32(0.8087437), 'r2_mid_std': np.float32(0.079805136), 'sndr_mid_std': np.float32(0.81261307), 'nrmse_mid_std': np.float32(0.011876838), 'psnr_best': np.float32(18.554195), 'r2_best': np.float32(0.3705621), 'sndr_best': np.float32(2.5173583), 'nrmse_best': np.float32(0.124916926), 'psnr_mid_best': np.float32(19.753036), 'r2_mid_best': np.float32(0.6747783), 'sndr_mid_best': np.float32(5.8205705), 'nrmse_mid_best': np.float32(0.10924562)}}\n"
     ]
    }
   ],
   "source": [
    "print(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b6408cfb-1882-46ef-90d8-3297f86a30b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./compare_result/vqvae.txt\", \"w\") as f:\n",
    "    f.write(str(results))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c83c4c16-d9db-4a90-abb4-debb49c3c008",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
