import shutil
import json
import requests
import subprocess

from process import process_all_files

N = 50

url = "http://33.33.33.127:6666/v1/chat/completions"
headers = {
    "Content-Type": "application/json"
}
data = {
    "model": "qwen3_8b_vllm/Qwen/Qwen3-8B",
    "messages": [
        {"role": "user", "content": "请你介绍一下你最喜欢的诗歌，并解释为什么喜欢它。"}
    ],
    "temperature": 0.7,
    "max_tokens": 8,
}
cmd = [
    "uv", "run", "python", "-m", "vllm.entrypoints.openai.api_server",
    "--model", "qwen3_8b_vllm/Qwen/Qwen3-8B",
    "--tensor-parallel-size", "2",
    "--gpu-memory-utilization", "0.90",
    "--port", "6666",
    "--enforce-eager",
    "--dtype", "half"
]

shutil.rmtree("output/", ignore_errors=True)
for i in range(N):
    process = subprocess.Popen(cmd)

    while True:
        try:
            response = requests.post(url, headers=headers, data=json.dumps(data))
            if response.status_code == 200:
                break
        except Exception as e:
            pass
    process.terminate()

    print("状态码:", response.status_code)
    print("返回内容:", response.json())
    process_all_files("output/", f"dataset/vllm_online/trace_{i}", "vllm_online", is_chrome=False)
    shutil.rmtree("output/")