HF_ROOT="/home/edlab/jhmoon/medgemma/models--google--medgemma-27b-text-it"
SNAP_HASH="6b08c481126ff65a9b8fa5ab4d691b152b8edb5d"
docker rm -f vllm-medgemma 2>/dev/null || true

docker run -d \
  --name vllm-medgemma \
  --gpus all \
  --ipc=host \
  -p 8002:8000 \
  --user "$(id -u)":"$(id -g)" \
  -e USER=jhmoon \
  -e HOME=/tmp \
  -e CUDA_VISIBLE_DEVICES=2,3 \
  -e HF_HUB_OFFLINE=1 \
  -e TRANSFORMERS_OFFLINE=1 \
  -e HF_HUB_CACHE=/hfmodel \
  -v "${HF_ROOT}":/hfmodel:ro \
  vllm/vllm-openai:gptoss \
    --model /hfmodel/snapshots/${SNAP_HASH} \
    --served-model-name medgemma-27b-text-it \
    --tensor-parallel-size 2 \
    --gpu-memory-utilization 0.90
