{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import gc\n",
    "from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer\n",
    "from PIL import Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00,  7.25it/s]\n"
     ]
    }
   ],
   "source": [
    "model = AutoModel.from_pretrained(\n",
    "    \"openbmb/MiniCPM-Llama3-V-2_5\",\n",
    "    torch_dtype=torch.float16,  # if cfg.fp16 else torch.float32,\n",
    "    trust_remote_code=True,\n",
    "    # device_map=\"auto\",\n",
    ").eval().to('cuda')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(\"openbmb/MiniCPM-Llama3-V-2_5\", trust_remote_code=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "image1 = Image.open(\"data/select500cxr/0a45a24e-faad022e-df1c90bd-61365611-6f0241af.jpg\")\n",
    "image1 = image1.convert(\"RGB\")\n",
    "# question = \"あなたは優秀な放射線科医です。このレントゲン写真を見て、所見文を書いてください。肺・心臓・骨に異常があるかに注目して読影してください。\"\n",
    "question = \"あなたは優秀な予防健診医です。このレントゲン写真を見て、所見文を書いてください。肺・心臓・骨に異常があるかに注目して読影してください。\"\n",
    "## 胸部の疾患を疑って胸部レントゲンを撮影しました。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'レントゲン写真は肺、心臓、骨の異常を調査するために使用されます。通常、肺は透明で、肺胞の網状構造が見えます。心臓は胸骨の下に位置し、正常な大きさと位置を示しています。骨は明らかに見え、胸骨、肋骨、肩胛骨が正しい位置にあることを示しています。肺胞の網状構造が明らかで、心臓の位置が正しい場合、通常、肺・心'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res = model.chat(\n",
    "    image=image1,\n",
    "    msgs=[{\"role\": \"user\", \"content\": question}],\n",
    "    tokenizer=tokenizer,\n",
    "    sampling=False,\n",
    "    temperature=0.7,\n",
    "    system_prompt=\"\",\n",
    "    max_new_tokens=128,\n",
    ")\n",
    "res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "image2 = Image.open(\"data/select500cxr/0b9fa49d-6b3515d2-a3ffaa9b-ae8753d4-7e269ce6.jpg\")\n",
    "image2 = image2.convert(\"RGB\")\n",
    "image3 = Image.open(\"data/select500cxr/0b14aede-e8d33df6-870058de-df17def9-f61fb024.jpg\")\n",
    "image3 = image3.convert(\"RGB\")\n",
    "images = [image1, image2, image3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from concurrent.futures import ThreadPoolExecutor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def answer(model, image):\n",
    "    output = model.chat(\n",
    "        image=image,\n",
    "        msgs=[{\"role\": \"user\", \"content\": question}],\n",
    "        tokenizer=tokenizer,\n",
    "        sampling=False,\n",
    "        temperature=0.7,\n",
    "        system_prompt=\"\",\n",
    "        max_tokens=128,\n",
    "    )\n",
    "    return output\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "executor = ThreadPoolExecutor(max_workers=3)\n",
    "futures = []\n",
    "for image in images:\n",
    "    futures.append(executor.submit(answer, image))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "レントゲン写真は肺、心臓、骨の異常を調査するために使用されます。通常、肺は透明で、肺胞の網状構造が見えます。心臓は胸骨の下に位置し、正常な大きさと位置を示しています。肋骨は骨密度が高く、肋骨骨折や骨折などの異常がないかどうかを調査することができます。肺胞の網状構造が明らかで、心臓の位置が正常で、肋骨の骨密度が高く、異常が見られないように見えます。ただし、放射線科医は通常、レントゲン写真を他の臨床情報と組み合わせて、正確な診断を下す必要があります。\n",
      "レントゲン写真の所見文は、放射線科医がレントゲン画像を解釈する際に使用する正式な記述です。通常、レントゲン写真の所見文には、画像中の異常や興味のある領域に関する詳細な記述が含まれます。例えば、肺の病変、心臓の異常、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、骨折、\n",
      "レントゲン写真は肺、心臓、骨の詳細な画像を提供します。肺は透明で、肺胞の網状構造が良く見えます。心臓は中央に位置し、正常な大きさと位置を示しています。骨は明らかに見え、肩胛骨、肋骨、肋骨の骨盤が良く見えます。画像には異常が見られないが、放射線科医は通常、肺胞のパターン、心臓の境界、骨の構造を調査し、異常を検出するか否かを判断します。心臓の境界や肺胞のパターンに異常がないか、骨の構造に異常がないかを確認するには、放射線科医は通常、画像を比較し、患者が以前のレントゲン写真を撮影したかどうかを調査し、臨床症状と相関関係を調査します。\n"
     ]
    }
   ],
   "source": [
    "for future in futures:\n",
    "    print(future.result())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import multiprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n",
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n",
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n",
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[7], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m multiprocessing\u001b[38;5;241m.\u001b[39mPool(processes\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m pool:\n\u001b[0;32m----> 2\u001b[0m     results \u001b[38;5;241m=\u001b[39m \u001b[43mpool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/pool.py:367\u001b[0m, in \u001b[0;36mPool.map\u001b[0;34m(self, func, iterable, chunksize)\u001b[0m\n\u001b[1;32m    362\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmap\u001b[39m(\u001b[38;5;28mself\u001b[39m, func, iterable, chunksize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m    363\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[1;32m    364\u001b[0m \u001b[38;5;124;03m    Apply `func` to each element in `iterable`, collecting the results\u001b[39;00m\n\u001b[1;32m    365\u001b[0m \u001b[38;5;124;03m    in a list that is returned.\u001b[39;00m\n\u001b[1;32m    366\u001b[0m \u001b[38;5;124;03m    '''\u001b[39;00m\n\u001b[0;32m--> 367\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_async\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miterable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapstar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/pool.py:768\u001b[0m, in \u001b[0;36mApplyResult.get\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    767\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(\u001b[38;5;28mself\u001b[39m, timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 768\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    769\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mready():\n\u001b[1;32m    770\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTimeoutError\u001b[39;00m\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/pool.py:765\u001b[0m, in \u001b[0;36mApplyResult.wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    764\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwait\u001b[39m(\u001b[38;5;28mself\u001b[39m, timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 765\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_event\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/threading.py:622\u001b[0m, in \u001b[0;36mEvent.wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    620\u001b[0m signaled \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_flag\n\u001b[1;32m    621\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m signaled:\n\u001b[0;32m--> 622\u001b[0m     signaled \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_cond\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m signaled\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/threading.py:320\u001b[0m, in \u001b[0;36mCondition.wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    318\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:    \u001b[38;5;66;03m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[39;00m\n\u001b[1;32m    319\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 320\u001b[0m         \u001b[43mwaiter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    321\u001b[0m         gotit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m    322\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "with multiprocessing.Pool(processes=4) as pool:\n",
    "    results = pool.map(answer, [image, image, image, image])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'results' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mresults\u001b[49m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'results' is not defined"
     ]
    }
   ],
   "source": [
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MiniCPMV(\n",
       "  (llm): LlamaForCausalLM(\n",
       "    (model): LlamaModel(\n",
       "      (embed_tokens): Embedding(128256, 4096)\n",
       "      (layers): ModuleList(\n",
       "        (0-31): 32 x LlamaDecoderLayer(\n",
       "          (self_attn): LlamaSdpaAttention(\n",
       "            (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "            (k_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
       "            (v_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
       "            (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "            (rotary_emb): LlamaRotaryEmbedding()\n",
       "          )\n",
       "          (mlp): LlamaMLP(\n",
       "            (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)\n",
       "            (up_proj): Linear(in_features=4096, out_features=14336, bias=False)\n",
       "            (down_proj): Linear(in_features=14336, out_features=4096, bias=False)\n",
       "            (act_fn): SiLU()\n",
       "          )\n",
       "          (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)\n",
       "          (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)\n",
       "        )\n",
       "      )\n",
       "      (norm): LlamaRMSNorm((4096,), eps=1e-05)\n",
       "      (rotary_emb): LlamaRotaryEmbedding()\n",
       "    )\n",
       "    (lm_head): Linear(in_features=4096, out_features=128256, bias=False)\n",
       "  )\n",
       "  (vpm): Idefics2VisionTransformer(\n",
       "    (embeddings): Idefics2VisionEmbeddings(\n",
       "      (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)\n",
       "      (position_embedding): Embedding(4900, 1152)\n",
       "    )\n",
       "    (encoder): Idefics2Encoder(\n",
       "      (layers): ModuleList(\n",
       "        (0-26): 27 x Idefics2EncoderLayer(\n",
       "          (self_attn): Idefics2VisionAttention(\n",
       "            (k_proj): Linear(in_features=1152, out_features=1152, bias=True)\n",
       "            (v_proj): Linear(in_features=1152, out_features=1152, bias=True)\n",
       "            (q_proj): Linear(in_features=1152, out_features=1152, bias=True)\n",
       "            (out_proj): Linear(in_features=1152, out_features=1152, bias=True)\n",
       "          )\n",
       "          (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)\n",
       "          (mlp): Idefics2VisionMLP(\n",
       "            (activation_fn): PytorchGELUTanh()\n",
       "            (fc1): Linear(in_features=1152, out_features=4304, bias=True)\n",
       "            (fc2): Linear(in_features=4304, out_features=1152, bias=True)\n",
       "          )\n",
       "          (layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "    (post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)\n",
       "  )\n",
       "  (resampler): Resampler(\n",
       "    (kv_proj): Linear(in_features=1152, out_features=4096, bias=False)\n",
       "    (attn): MultiheadAttention(\n",
       "      (out_proj): Linear(in_features=4096, out_features=4096, bias=True)\n",
       "    )\n",
       "    (ln_q): LayerNorm((4096,), eps=1e-06, elementwise_affine=True)\n",
       "    (ln_kv): LayerNorm((4096,), eps=1e-06, elementwise_affine=True)\n",
       "    (ln_post): LayerNorm((4096,), eps=1e-06, elementwise_affine=True)\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.multiprocessing as mp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "RuntimeError",
     "evalue": "context has already been set",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mset_start_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mspawn\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/context.py:247\u001b[0m, in \u001b[0;36mDefaultContext.set_start_method\u001b[0;34m(self, method, force)\u001b[0m\n\u001b[1;32m    245\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mset_start_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, method, force\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m    246\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_actual_context \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m force:\n\u001b[0;32m--> 247\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcontext has already been set\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m    248\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m force:\n\u001b[1;32m    249\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_actual_context \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "\u001b[0;31mRuntimeError\u001b[0m: context has already been set"
     ]
    }
   ],
   "source": [
    "mp.set_start_method('spawn')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def answer(output_queue, image):\n",
    "    output = model.chat(\n",
    "        image=image,\n",
    "        msgs=[{\"role\": \"user\", \"content\": question}],\n",
    "        tokenizer=tokenizer,\n",
    "        sampling=False,\n",
    "        temperature=0.7,\n",
    "        system_prompt=\"\",\n",
    "        max_tokens=128,\n",
    "    )\n",
    "    output_queue.put(output)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n",
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n",
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n",
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "processes = []\n",
    "output_queue = mp.Queue()\n",
    "for rank in range(4):\n",
    "    p = mp.Process(target=answer, args=(output_queue, image))\n",
    "    p.start()\n",
    "    processes.append(p)\n",
    "for p in processes:\n",
    "    p.join()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "for a in return_dict:\n",
    "    print(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<multiprocessing.queues.Queue at 0x7fa3cc163e90>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output_queue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43moutput_queue\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/queues.py:103\u001b[0m, in \u001b[0;36mQueue.get\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m    101\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m block \u001b[38;5;129;01mand\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    102\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_rlock:\n\u001b[0;32m--> 103\u001b[0m         res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_recv_bytes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    104\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sem\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m    105\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/connection.py:215\u001b[0m, in \u001b[0;36m_ConnectionBase.recv_bytes\u001b[0;34m(self, maxlength)\u001b[0m\n\u001b[1;32m    213\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m maxlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m maxlength \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m    214\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnegative maxlength\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 215\u001b[0m buf \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_recv_bytes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmaxlength\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    216\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buf \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    217\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_bad_message_length()\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/connection.py:413\u001b[0m, in \u001b[0;36mConnection._recv_bytes\u001b[0;34m(self, maxsize)\u001b[0m\n\u001b[1;32m    412\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_recv_bytes\u001b[39m(\u001b[38;5;28mself\u001b[39m, maxsize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 413\u001b[0m     buf \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_recv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    414\u001b[0m     size, \u001b[38;5;241m=\u001b[39m struct\u001b[38;5;241m.\u001b[39munpack(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m!i\u001b[39m\u001b[38;5;124m\"\u001b[39m, buf\u001b[38;5;241m.\u001b[39mgetvalue())\n\u001b[1;32m    415\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m size \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m:\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/connection.py:378\u001b[0m, in \u001b[0;36mConnection._recv\u001b[0;34m(self, size, read)\u001b[0m\n\u001b[1;32m    376\u001b[0m remaining \u001b[38;5;241m=\u001b[39m size\n\u001b[1;32m    377\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m remaining \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 378\u001b[0m     chunk \u001b[38;5;241m=\u001b[39m \u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremaining\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    379\u001b[0m     n \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(chunk)\n\u001b[1;32m    380\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m n \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "output_queue.get()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "outputs = []\n",
    "while not output_queue.empty():\n",
    "    outputs.append(output_queue.get())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "0",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(results)\n",
      "Cell \u001b[0;32mIn[19], line 1\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[0;32m----> 1\u001b[0m results \u001b[38;5;241m=\u001b[39m [\u001b[43mreturn_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m5\u001b[39m)]\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(results)\n",
      "File \u001b[0;32m<string>:2\u001b[0m, in \u001b[0;36m__getitem__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n",
      "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/multiprocessing/managers.py:837\u001b[0m, in \u001b[0;36mBaseProxy._callmethod\u001b[0;34m(self, methodname, args, kwds)\u001b[0m\n\u001b[1;32m    835\u001b[0m     dispatch(conn, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdecref\u001b[39m\u001b[38;5;124m'\u001b[39m, (token\u001b[38;5;241m.\u001b[39mid,))\n\u001b[1;32m    836\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m proxy\n\u001b[0;32m--> 837\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m convert_to_error(kind, result)\n",
      "\u001b[0;31mKeyError\u001b[0m: 0"
     ]
    }
   ],
   "source": [
    "results = [return_dict[i] for i in range(5)]\n",
    "print(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Process name='Process-13' pid=3463700 parent=3461113 stopped exitcode=-SIGSEGV>,\n",
       " <Process name='Process-14' pid=3463701 parent=3461113 stopped exitcode=-SIGSEGV>,\n",
       " <Process name='Process-15' pid=3463702 parent=3461113 stopped exitcode=-SIGSEGV>,\n",
       " <Process name='Process-16' pid=3463703 parent=3461113 stopped exitcode=-SIGSEGV>]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "processes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-08-11 12:43:31,785\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
     ]
    }
   ],
   "source": [
    "from vllm import LLM, SamplingParams"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 08-11 12:43:34 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='openbmb/MiniCPM-Llama3-V-2_5', speculative_config=None, tokenizer='openbmb/MiniCPM-Llama3-V-2_5', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=openbmb/MiniCPM-Llama3-V-2_5, use_v2_block_manager=False, enable_prefix_caching=False)\n",
      "INFO 08-11 12:43:36 model_runner.py:720] Starting to load model openbmb/MiniCPM-Llama3-V-2_5...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n",
      "  @torch.library.impl_abstract(\"xformers_flash::flash_fwd\")\n",
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n",
      "  @torch.library.impl_abstract(\"xformers_flash::flash_bwd\")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 08-11 12:43:38 weight_utils.py:225] Using model weights format ['*.safetensors']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading safetensors checkpoint shards:   0% Completed | 0/7 [00:00<?, ?it/s]\n",
      "Loading safetensors checkpoint shards:  14% Completed | 1/7 [00:00<00:02,  2.89it/s]\n",
      "Loading safetensors checkpoint shards:  29% Completed | 2/7 [00:00<00:02,  2.39it/s]\n",
      "Loading safetensors checkpoint shards:  43% Completed | 3/7 [00:01<00:01,  2.32it/s]\n",
      "Loading safetensors checkpoint shards:  57% Completed | 4/7 [00:01<00:01,  2.25it/s]\n",
      "Loading safetensors checkpoint shards:  71% Completed | 5/7 [00:02<00:00,  2.23it/s]\n",
      "Loading safetensors checkpoint shards:  86% Completed | 6/7 [00:02<00:00,  2.21it/s]\n",
      "Loading safetensors checkpoint shards: 100% Completed | 7/7 [00:03<00:00,  2.20it/s]\n",
      "Loading safetensors checkpoint shards: 100% Completed | 7/7 [00:03<00:00,  2.25it/s]\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 08-11 12:43:42 model_runner.py:732] Loading model weights took 15.9516 GB\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kaito47802/.pyenv/versions/3.11.0/envs/xray-merge/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 08-11 12:43:45 gpu_executor.py:102] # GPU blocks: 31513, # CPU blocks: 2048\n",
      "INFO 08-11 12:43:48 model_runner.py:1024] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.\n",
      "INFO 08-11 12:43:48 model_runner.py:1028] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.\n",
      "INFO 08-11 12:44:04 model_runner.py:1225] Graph capturing finished in 16 secs.\n"
     ]
    }
   ],
   "source": [
    "llm = LLM(\n",
    "    model=\"openbmb/MiniCPM-Llama3-V-2_5\",\n",
    "    trust_remote_code=True,\n",
    "    gpu_memory_utilization=1,\n",
    "    max_model_len=2048\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<vllm.entrypoints.llm.LLM at 0x7f2300fca350>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "llm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "xray-merge",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
