{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from diffusers import StableDiffusionPipeline, UNet2DConditionModel, AutoencoderKL, PNDMScheduler\n",
    "from transformers import T5EncoderModel, T5TokenizerFast\n",
    "import torch\n",
    "import os\n",
    "import numpy as np\n",
    "import librosa\n",
    "from tqdm import tqdm\n",
    "import soundfile as sf\n",
    "import torchaudio\n",
    "from IPython.display import Audio\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch_device = torch.device(\"cuda:0\")\n",
    "check_point = \"430000\"\n",
    "save_path = \"/blob/v-xxxx/AUDITDATA/AUDIT_INFER\"\n",
    "num_inference_steps = 100\n",
    "guidance_scale= 7.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_path = \"/blob/v-xxxx/AudioEditingModel/Diffusion_SG/checkpoint-350000\"\n",
    "vae = AutoencoderKL.from_pretrained(model_path, subfolder=\"vae\")\n",
    "unet = UNet2DConditionModel.from_pretrained(\"/blob/v-xxxx/AUDITPLUS/AUDIT_G_0/checkpoint-{}\".format(check_point))\n",
    "tokenizer = T5TokenizerFast.from_pretrained(model_path, subfolder=\"tokenizer\")\n",
    "text_encoder = T5EncoderModel.from_pretrained(model_path, subfolder=\"text_encoder\")\n",
    "\n",
    "vae.to(torch_device)\n",
    "text_encoder.to(torch_device)\n",
    "unet.to(torch_device)\n",
    "vae.requires_grad_(False)\n",
    "unet.requires_grad_(False)\n",
    "text_encoder.requires_grad_(False)"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
