#!/bin/bash

# Check if the correct number of arguments is provided
if [ $# -ne 3 ]; then
    echo "Usage: bash open.sh <model_choice> <port_number> <gpu_number>"
    echo "  <model_choice>: 1 for Qwen2.5-7B, 2 for Qwen2.5-3B"
    echo "  <port_number>: 0 for port 8000, 1 for port 8001"
    echo "  <gpu_number>: GPU index (e.g., 0, 1, 2, ...)"
    exit 1
fi

# Model selection
if [ "$1" -eq 1 ]; then
    MODEL="Qwen/Qwen2.5-7B-Instruct"
elif [ "$1" -eq 2 ]; then
    MODEL="Qwen/Qwen2.5-3B-Instruct"
elif [ "$1" -eq 3 ]; then
    MODEL="meta-llama/Llama-3.2-3B-Instruct"
elif [ "$1" -eq 4 ]; then
    MODEL="meta-llama/Llama-3.1-8B-Instruct"
else
    echo "❌ Invalid model choice! Choose 1 (Qwen2.5-7B) or 2 (Qwen2.5-3B)."
    exit 1
fi

# Port assignment (8000 + port number)
if [[ "$2" -ge 0 && "$2" -le 10 ]]; then
    PORT=$((8000 + $2))
else
    echo "❌ Invalid port number! Choose a value between 0 and 10 (which maps to ports 8000-8010)."
    exit 1
fi

# GPU assignment
if [[ "$3" -ge 0 && "$3" -le 7 ]]; then
    GPU_NUMBER=$3
else
    echo "❌ Invalid GPU number! Choose a value between 0 and 7."
    exit 1
fi

echo "🚀 Starting vLLM server for $MODEL on port $PORT using GPU $GPU_NUMBER..."
CUDA_VISIBLE_DEVICES=$GPU_NUMBER python -m vllm.entrypoints.openai.api_server --model $MODEL --port $PORT


CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \
--model Qwen/Qwen2.5-3B-Instruct \
--port 8001 \
--max-model-len 32768 \
--max_num_seqs 1024 \
--trust-remote-code