python3 inference_data.py \
    --hosts 0.0.0.0 \
    --input-files ../data/std_format/train_sample_1k.parquet \
    --port 30000 \
    --num-workers 24 \
    --sample-num 100


import requests

url = f"http://10.33.0.33:30000/v1/chat/completions"

data = {
    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "messages": [{"role": "user", "content": "What is the capital of France?"}],
}

response = requests.post(url, json=data)
print(response.json())


import subprocess, json

curl_command = f"""
curl -s http://10.33.0.33:30000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{{"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "messages": [{{"role": "user", "content": "What is the capital of France?"}}]}}'
"""

response = json.loads(subprocess.check_output(curl_command, shell=True))
print(response)




curl http://10.25.0.230:30000/generate -H "Content-Type: application/json" -d '{ "text": "已知复数x满足x + 1/x = 1，求x^2016 + 1/(x^2016)的值", "sampling_params": { "max_new_tokens": 2048, "temperature": 1.0 } }' 
