from openai import OpenAI

# Point the client to your vLLM server endpoint.
# Replace 127.0.0.1 with the node’s local hostname if needed.
client = OpenAI(base_url="http://192.168.2.10:8000/v1", api_key="EMPTY")

# Send a chat-style query
response = client.chat.completions.create(
    model="/.cache/huggingface/hub/models--Qwen--Qwen3-30B-A3B-Thinking-2507-FP8/snapshots/60d80c83c53c3b611c642dbb8c942b3f90c5948a",  # or the name you used to serve
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain why the sky appears blue during daytime."},
    ],
    temperature=0.7,
)

print("Model response:\n", response.choices[0].message.content)