{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# https://huggingface.co/blog/stable_diffusion\n",
    "import sys\n",
    "sys.path.append(\"..\")\n",
    "from src.model.components.stable_diffusion_autoencoder import StableDiffusionAutoEncoder\n",
    "from src.dataset.vision import forward_transform, inverse_transform\n",
    "from PIL import Image\n",
    "import matplotlib.pyplot as plt\n",
    "from pathlib import Path\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_image = Image.open(\"/data/vtt/states/0010785c87799aca_9_0.jpg\").convert(\"RGB\")\n",
    "image = forward_transform(mode=\"stable_diffusion\")(raw_image).cuda()\n",
    "print(image.shape)\n",
    "raw_image = inverse_transform(mode=\"stable_diffusion\")(image.cpu())\n",
    "raw_image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vae = StableDiffusionAutoEncoder().cuda()\n",
    "print(vae.c_embed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def reconstruct_image(vae, image_path):\n",
    "    raw_image = Image.open(image_path)\n",
    "\n",
    "    image = forward_transform(mode=\"stable_diffusion\")(raw_image).cuda()\n",
    "    raw_image = inverse_transform(mode=\"stable_diffusion\")(image.cpu())\n",
    "    print(f\"image: {image.shape}\")\n",
    "\n",
    "    z = vae.encode(image.unsqueeze(0))\n",
    "    print(f\"z: {z.shape}\")\n",
    "\n",
    "    reconstructed_image = vae.decode(z).squeeze(0).cpu()\n",
    "    reconstructed_image = inverse_transform(mode=\"stable_diffusion\")(reconstructed_image)\n",
    "\n",
    "    fig, axes = plt.subplots(1, 2, figsize=(10, 5))\n",
    "    axes[0].imshow(raw_image)\n",
    "    axes[1].imshow(reconstructed_image)\n",
    "    # remove axis\n",
    "    for ax in axes:\n",
    "        ax.axis(\"off\")\n",
    "    # add title for the first image\n",
    "    axes[0].set_title(\"Original\")\n",
    "    axes[1].set_title(\"Reconstructed\")\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "states = list(Path(\"/data/vtt/states\").glob(\"*.jpg\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reconstruct_image(vae, states[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reconstruct_image(vae, random.choice(states))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reconstruct_image(vae, random.choice(states))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reconstruct_image(vae, random.choice(states))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  },
  "vscode": {
   "interpreter": {
    "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
