#!/bin/bash

model="meta-llama/Meta-Llama-3.1-8B-Instruct" 
#model="Qwen/Qwen2-7B"

# METRIC_LOG=metric_a30_prefill_7B.log \
python3 \
	-m vllm.entrypoints.openai.api_server \
	--model $model \
	--port 8000 \
	-tp 1 \
	--max-model-len 10000 \
	--max-num-seqs 1 \
	--enforce-eager \
	--disable-log-stats \
	--disable-log-requests \
	--gpu-memory-utilization 0.8
