{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmasatoshi136\u001b[0m (\u001b[33mmasa136\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd \n",
    "import sys \n",
    "sys.path.append(\"../../\")\n",
    "\n",
    "import wandb\n",
    "wandb.login(host=\"https://api.wandb.ai\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "wandb version 0.17.5 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.17.4"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/raid/home/ueharam1/prj/RLfinetuning_Diffusion_Bioseq/tutorials/UTR/wandb/run-20240720_115710-5sas8jux</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/masa136/RLfinetuning_Diffusion_Bioseq-tutorials_UTR/runs/5sas8jux' target=\"_blank\">clear-valley-10</a></strong> to <a href='https://wandb.ai/masa136/RLfinetuning_Diffusion_Bioseq-tutorials_UTR' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/masa136/RLfinetuning_Diffusion_Bioseq-tutorials_UTR' target=\"_blank\">https://wandb.ai/masa136/RLfinetuning_Diffusion_Bioseq-tutorials_UTR</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/masa136/RLfinetuning_Diffusion_Bioseq-tutorials_UTR/runs/5sas8jux' target=\"_blank\">https://wandb.ai/masa136/RLfinetuning_Diffusion_Bioseq-tutorials_UTR/runs/5sas8jux</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[34m\u001b[1mwandb\u001b[0m: Downloading large artifact UTR-dataset:v0, 3683.51MB. 4 files... \n",
      "\u001b[34m\u001b[1mwandb\u001b[0m:   4 of 4 files downloaded.  \n",
      "Done. 0:0:7.3\n"
     ]
    }
   ],
   "source": [
    "run = wandb.init()\n",
    "artifact = run.use_artifact('fderc_diffusion/Diffusion-DNA-RNA/UTR-dataset:v0')\n",
    "dir = artifact.download()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Training "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Train conditional diffusion models p(x|y) with 3 classes of y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmasatoshi136\u001b[0m (\u001b[33mmasa136\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: wandb version 0.17.5 is available!  To upgrade, please run:\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m:  $ pip install wandb --upgrade\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/raid/home/ueharam1/prj/RLfinetuning_Diffusion_Bioseq/tutorials/UTR/wandb/run-20240720_115727-4khgpyb9\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mdiffusion\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/masa136/Example-DNA\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/masa136/Example-DNA/runs/4khgpyb9\u001b[0m\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "/home/ueharam1/miniconda3/envs/testgrelu/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default\n",
      "You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n",
      "\n",
      "  | Name  | Type                 | Params | Mode \n",
      "-------------------------------------------------------\n",
      "0 | model | ScoreNet_Conditional | 14.6 M | train\n",
      "-------------------------------------------------------\n",
      "14.6 M    Trainable params\n",
      "128       Non-trainable params\n",
      "14.6 M    Total params\n",
      "58.347    Total estimated model params size (MB)\n",
      "Epoch 0: 100%|█████████████████████| 1274/1274 [12:27<00:00,  1.70it/s, v_num=2]`Trainer.fit` stopped: `max_epochs=1` reached.\n",
      "Epoch 0: 100%|█████████████████████| 1274/1274 [12:28<00:00,  1.70it/s, v_num=2]\n"
     ]
    }
   ],
   "source": [
    "!python ../../src/lightning_diffusion.py \\\n",
    "     --class_number 3 --device 2 \\\n",
    "     --wandb_projectname Example-DNA --num_epochs 1 --wandb True --train_data 'artifacts/UTR-dataset:v0/RNA_seq.npz'\\\n",
    "     --seed_file_name 'artifacts/UTR-dataset:v0/steps400.cat4.speed_balance.time4.0.samples100000.pth'\\\n",
    "     --time_schedule 'artifacts/UTR-dataset:v0/time_dependent.npz'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Train unconditional diffusion models (class_number=1) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmasatoshi136\u001b[0m (\u001b[33mmasa136\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: wandb version 0.17.5 is available!  To upgrade, please run:\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m:  $ pip install wandb --upgrade\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/raid/home/ueharam1/prj/RLfinetuning_Diffusion_Bioseq/tutorials/UTR/wandb/run-20240720_121011-3rfm014u\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mdiffusion\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/masa136/Example-DNA\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/masa136/Example-DNA/runs/3rfm014u\u001b[0m\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "/home/ueharam1/miniconda3/envs/testgrelu/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default\n",
      "You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n",
      "\n",
      "  | Name  | Type     | Params | Mode \n",
      "-------------------------------------------\n",
      "0 | model | ScoreNet | 13.3 M | train\n",
      "-------------------------------------------\n",
      "13.3 M    Trainable params\n",
      "128       Non-trainable params\n",
      "13.3 M    Total params\n",
      "53.080    Total estimated model params size (MB)\n",
      "Epoch 0:   4%|█                      | 57/1274 [00:36<12:59,  1.56it/s, v_num=3]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0: 100%|█████████████████████| 1274/1274 [15:37<00:00,  1.36it/s, v_num=3]`Trainer.fit` stopped: `max_epochs=1` reached.\n",
      "Epoch 0: 100%|█████████████████████| 1274/1274 [15:38<00:00,  1.36it/s, v_num=3]\n"
     ]
    }
   ],
   "source": [
    "!python ../../src/lightning_diffusion.py \\\n",
    "     --class_number 1 --device 2 \\\n",
    "     --wandb_projectname Example-DNA --num_epochs 1 --wandb True --train_data 'artifacts/UTR-dataset:v0/RNA_seq.npz'\\\n",
    "     --seed_file_name 'artifacts/UTR-dataset:v0/steps400.cat4.speed_balance.time4.0.samples100000.pth'\\\n",
    "     --time_schedule 'artifacts/UTR-dataset:v0/time_dependent.npz'"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "diffusion",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
