#!/bin/bash
#SBATCH --partition=YOUR_PARTITION
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=72
#SBATCH --gres=gpu:0
#SBATCH --job-name=streaming_eval
#SBATCH --mem=256GB
#SBATCH --time=0-23:59:59
#SBATCH --qos=YOUR_QOS
#SBATCH --output=log/%j.out
#SBATCH --error=log/%j.err

CHECKPOINT_DIR=/path/to/checkpoint_lib

run() {
    echo ENV: $ENV
    source "$CHECKPOINT_DIR/envs/$ENV/bin/activate"
    python "$@"
}

conda_run() {
    echo ENV: $ENV
    conda run -n "$ENV" --no-capture-output python "$@"
}

ENV=qwen3_vl run streaming_inference_batch.py --model-name doubao_seed \
    --benchmarks "omnistreaming_v2" \
    --prompts "streaming" \
    --cuda-visible-devices 0,1,2,3,4,5,6,7 \
    --gpu-per-model-backend 0 \
    --n-model-backend 128 \
    --batch-size 1 \
    --n-actors-alive 128 \
    --output-dir outputs \
    --load-modules model_backend scheduler

wait